From python-checkins at python.org Sun Jan 1 02:19:28 2006 From: python-checkins at python.org (tim.peters) Date: Sun, 1 Jan 2006 02:19:28 +0100 (CET) Subject: [Python-checkins] commit of r41860 - python/trunk/Objects/dictobject.c Message-ID: <20060101011928.76D481E4002@bag.python.org> Author: tim.peters Date: Sun Jan 1 02:19:23 2006 New Revision: 41860 Modified: python/trunk/Objects/dictobject.c Log: Fixed English in a comment; trimmed trailing whitespace; no code changes. Modified: python/trunk/Objects/dictobject.c ============================================================================== --- python/trunk/Objects/dictobject.c (original) +++ python/trunk/Objects/dictobject.c Sun Jan 1 02:19:23 2006 @@ -2,7 +2,7 @@ /* Dictionary object implementation using a hash table */ /* The distribution includes a separate file, Objects/dictnotes.txt, - describing explorations into dictionary design and optimization. + describing explorations into dictionary design and optimization. It covers typical dictionary use patterns, the parameters for tuning dictionaries, and several ideas for possible optimizations. */ @@ -519,10 +519,10 @@ } /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the - * dictionary if it is merely replacing the value for an existing key. - * This is means that it's safe to loop over a dictionary with - * PyDict_Next() and occasionally replace a value -- but you can't - * insert new keys or remove them. + * dictionary if it's merely replacing the value for an existing key. + * This means that it's safe to loop over a dictionary with PyDict_Next() + * and occasionally replace a value -- but you can't insert new keys or + * remove them. */ int PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value) @@ -554,15 +554,15 @@ /* If we added a key, we can safely resize. Otherwise just return! * If fill >= 2/3 size, adjust size. Normally, this doubles or * quaduples the size, but it's also possible for the dict to shrink - * (if ma_fill is much larger than ma_used, meaning a lot of dict + * (if ma_fill is much larger than ma_used, meaning a lot of dict * keys have been * deleted). - * + * * Quadrupling the size improves average dictionary sparseness * (reducing collisions) at the cost of some memory and iteration * speed (which loops over every possible entry). It also halves * the number of expensive resize operations in a growing dictionary. - * - * Very large dictionaries (over 50K items) use doubling instead. + * + * Very large dictionaries (over 50K items) use doubling instead. * This may help applications with severe memory constraints. */ if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2)) @@ -734,7 +734,7 @@ PyMem_DEL(mp->ma_table); if (num_free_dicts < MAXFREEDICTS && mp->ob_type == &PyDict_Type) free_dicts[num_free_dicts++] = mp; - else + else mp->ob_type->tp_free((PyObject *)mp); Py_TRASHCAN_SAFE_END(mp) } @@ -2251,7 +2251,7 @@ Py_DECREF(PyTuple_GET_ITEM(result, 1)); } else { result = PyTuple_New(2); - if (result == NULL) + if (result == NULL) return NULL; } di->len--; From python-checkins at python.org Sun Jan 1 22:34:06 2006 From: python-checkins at python.org (barry.warsaw) Date: Sun, 1 Jan 2006 22:34:06 +0100 (CET) Subject: [Python-checkins] commit of r41861 - in python/branches/release24-maint: Misc/NEWS Tools/pynche/StripViewer.py Message-ID: <20060101213406.14A791E4002@bag.python.org> Author: barry.warsaw Date: Sun Jan 1 22:33:50 2006 New Revision: 41861 Modified: python/branches/release24-maint/Misc/NEWS python/branches/release24-maint/Tools/pynche/StripViewer.py Log: Patch by Ori Avtalion to fix a minor display glitch in the RightArrow. I will port forward to 2.5. Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Sun Jan 1 22:33:50 2006 @@ -80,6 +80,11 @@ - Bug #1296004: httplib.py: Limit maximal amount of data read from the socket to avoid a MemoryError on Windows. +Tools/Demos +----------- + +- Fixed a display glitch in Pynche, which could cause the right arrow to + wiggle over by a pixel. What's New in Python 2.4.2 final? ================================= Modified: python/branches/release24-maint/Tools/pynche/StripViewer.py ============================================================================== --- python/branches/release24-maint/Tools/pynche/StripViewer.py (original) +++ python/branches/release24-maint/Tools/pynche/StripViewer.py Sun Jan 1 22:33:50 2006 @@ -151,9 +151,9 @@ return arrow, text def _x(self): - coords = self._canvas.bbox(self._TAG) + coords = self._canvas.coords(self._TAG) assert coords - return coords[2] - 6 # BAW: kludge + return coords[0] + self._ARROWWIDTH From python-checkins at python.org Sun Jan 1 22:35:24 2006 From: python-checkins at python.org (reinhold.birkenfeld) Date: Sun, 1 Jan 2006 22:35:24 +0100 (CET) Subject: [Python-checkins] commit of r41862 - python/trunk/Doc/lib/liblogging.tex python/trunk/Doc/lib/libstdtypes.tex python/trunk/Doc/lib/libsubprocess.tex Message-ID: <20060101213524.C657A1E4002@bag.python.org> Author: reinhold.birkenfeld Date: Sun Jan 1 22:35:20 2006 New Revision: 41862 Modified: python/trunk/Doc/lib/liblogging.tex python/trunk/Doc/lib/libstdtypes.tex python/trunk/Doc/lib/libsubprocess.tex Log: Bug #1394868: doc typos Modified: python/trunk/Doc/lib/liblogging.tex ============================================================================== --- python/trunk/Doc/lib/liblogging.tex (original) +++ python/trunk/Doc/lib/liblogging.tex Sun Jan 1 22:35:20 2006 @@ -59,7 +59,7 @@ logging output. Logging messages are encoded as instances of the \class{LogRecord} class. -When a logger decides to actually log an event, an \class{LogRecord} +When a logger decides to actually log an event, a \class{LogRecord} instance is created from the logging message. Logging messages are subjected to a dispatch mechanism through the Modified: python/trunk/Doc/lib/libstdtypes.tex ============================================================================== --- python/trunk/Doc/lib/libstdtypes.tex (original) +++ python/trunk/Doc/lib/libstdtypes.tex Sun Jan 1 22:35:20 2006 @@ -952,8 +952,8 @@ precede the conversion (overrides a "space" flag).} \end{tableii} -The length modifier may be \code{h}, \code{l}, and \code{L} may be -present, but are ignored as they are not necessary for Python. +A length modifier (\code{h}, \code{l}, or \code{L}) may be +present, but is ignored as it is not necessary for Python. The conversion types are: @@ -1606,7 +1606,7 @@ defaults to the current position. The current file position is not changed. Note that if a specified size exceeds the file's current size, the result is platform-dependent: possibilities - include that file may remain unchanged, increase to the specified + include that the file may remain unchanged, increase to the specified size as if zero-filled, or increase to the specified size with undefined new content. Availability: Windows, many \UNIX{} variants. Modified: python/trunk/Doc/lib/libsubprocess.tex ============================================================================== --- python/trunk/Doc/lib/libsubprocess.tex (original) +++ python/trunk/Doc/lib/libsubprocess.tex Sun Jan 1 22:35:20 2006 @@ -103,7 +103,7 @@ for the new process. If \var{universal_newlines} is \constant{True}, the file objects stdout -and stderr are opened as a text files, but lines may be terminated by +and stderr are opened as text files, but lines may be terminated by any of \code{'\e n'}, the Unix end-of-line convention, \code{'\e r'}, the Macintosh convention or \code{'\e r\e n'}, the Windows convention. All of these external representations are seen as \code{'\e n'} by the From python-checkins at python.org Sun Jan 1 22:35:48 2006 From: python-checkins at python.org (reinhold.birkenfeld) Date: Sun, 1 Jan 2006 22:35:48 +0100 (CET) Subject: [Python-checkins] commit of r41863 - python/branches/release24-maint/Doc/lib/liblogging.tex python/branches/release24-maint/Doc/lib/libstdtypes.tex python/branches/release24-maint/Doc/lib/libsubprocess.tex Message-ID: <20060101213548.04B571E4002@bag.python.org> Author: reinhold.birkenfeld Date: Sun Jan 1 22:35:41 2006 New Revision: 41863 Modified: python/branches/release24-maint/Doc/lib/liblogging.tex python/branches/release24-maint/Doc/lib/libstdtypes.tex python/branches/release24-maint/Doc/lib/libsubprocess.tex Log: Bug #1394868: doc typos Modified: python/branches/release24-maint/Doc/lib/liblogging.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/liblogging.tex (original) +++ python/branches/release24-maint/Doc/lib/liblogging.tex Sun Jan 1 22:35:41 2006 @@ -59,7 +59,7 @@ logging output. Logging messages are encoded as instances of the \class{LogRecord} class. -When a logger decides to actually log an event, an \class{LogRecord} +When a logger decides to actually log an event, a \class{LogRecord} instance is created from the logging message. Logging messages are subjected to a dispatch mechanism through the Modified: python/branches/release24-maint/Doc/lib/libstdtypes.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libstdtypes.tex (original) +++ python/branches/release24-maint/Doc/lib/libstdtypes.tex Sun Jan 1 22:35:41 2006 @@ -952,8 +952,8 @@ precede the conversion (overrides a "space" flag).} \end{tableii} -The length modifier may be \code{h}, \code{l}, and \code{L} may be -present, but are ignored as they are not necessary for Python. +A length modifier (\code{h}, \code{l}, or \code{L}) may be +present, but is ignored as it is not necessary for Python. The conversion types are: @@ -1606,7 +1606,7 @@ defaults to the current position. The current file position is not changed. Note that if a specified size exceeds the file's current size, the result is platform-dependent: possibilities - include that file may remain unchanged, increase to the specified + include that the file may remain unchanged, increase to the specified size as if zero-filled, or increase to the specified size with undefined new content. Availability: Windows, many \UNIX{} variants. Modified: python/branches/release24-maint/Doc/lib/libsubprocess.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libsubprocess.tex (original) +++ python/branches/release24-maint/Doc/lib/libsubprocess.tex Sun Jan 1 22:35:41 2006 @@ -103,7 +103,7 @@ for the new process. If \var{universal_newlines} is \constant{True}, the file objects stdout -and stderr are opened as a text files, but lines may be terminated by +and stderr are opened as text files, but lines may be terminated by any of \code{'\e n'}, the Unix end-of-line convention, \code{'\e r'}, the Macintosh convention or \code{'\e r\e n'}, the Windows convention. All of these external representations are seen as \code{'\e n'} by the From python-checkins at python.org Sun Jan 1 22:49:00 2006 From: python-checkins at python.org (barry.warsaw) Date: Sun, 1 Jan 2006 22:49:00 +0100 (CET) Subject: [Python-checkins] commit of r41864 - in python/trunk: Misc/NEWS Tools/pynche/StripViewer.py Message-ID: <20060101214900.0FC651E400C@bag.python.org> Author: barry.warsaw Date: Sun Jan 1 22:48:54 2006 New Revision: 41864 Modified: python/trunk/Misc/NEWS python/trunk/Tools/pynche/StripViewer.py Log: Ported from 2.4 branch: Patch by Ori Avtalion to fix a minor display glitch in the RightArrow. Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Sun Jan 1 22:48:54 2006 @@ -593,8 +593,9 @@ once when a size argument is given. This prevents a buffer overflow in the tokenizer with very long source lines. -- Bug #1083110: ``zlib.decompress.flush()`` would segfault if called immediately - after creating the object, without any intervening ``.decompress()`` calls. +- Bug #1083110: ``zlib.decompress.flush()`` would segfault if called + immediately after creating the object, without any intervening + ``.decompress()`` calls. - The reconvert.quote function can now emit triple-quoted strings. The reconvert module now has some simple documentation. @@ -739,6 +740,8 @@ - Patch #1177597: Correct Complex.__init__. +- Fixed a display glitch in Pynche, which could cause the right arrow to + wiggle over by a pixel. What's New in Python 2.4 final? =============================== Modified: python/trunk/Tools/pynche/StripViewer.py ============================================================================== --- python/trunk/Tools/pynche/StripViewer.py (original) +++ python/trunk/Tools/pynche/StripViewer.py Sun Jan 1 22:48:54 2006 @@ -151,9 +151,9 @@ return arrow, text def _x(self): - coords = self._canvas.bbox(self._TAG) + coords = self._canvas.coords(self._TAG) assert coords - return coords[2] - 6 # BAW: kludge + return coords[0] + self._ARROWWIDTH From python-checkins at python.org Mon Jan 2 03:46:57 2006 From: python-checkins at python.org (neal.norwitz) Date: Mon, 2 Jan 2006 03:46:57 +0100 (CET) Subject: [Python-checkins] commit of r41865 - python/trunk/Objects/typeobject.c Message-ID: <20060102024657.45C681E4002@bag.python.org> Author: neal.norwitz Date: Mon Jan 2 03:46:54 2006 New Revision: 41865 Modified: python/trunk/Objects/typeobject.c Log: Fix ref/memory leak introduced in rev 41845. Modified: python/trunk/Objects/typeobject.c ============================================================================== --- python/trunk/Objects/typeobject.c (original) +++ python/trunk/Objects/typeobject.c Mon Jan 2 03:46:54 2006 @@ -1326,6 +1326,7 @@ PyErr_Format(PyExc_TypeError, "mro() returned a non-class ('%.500s')", cls->ob_type->tp_name); + Py_DECREF(tuple); return -1; } t = (PyTypeObject*)cls; @@ -1333,6 +1334,7 @@ PyErr_Format(PyExc_TypeError, "mro() returned base with unsuitable layout ('%.500s')", t->tp_name); + Py_DECREF(tuple); return -1; } } From python-checkins at python.org Mon Jan 2 08:22:14 2006 From: python-checkins at python.org (fred.drake) Date: Mon, 2 Jan 2006 08:22:14 +0100 (CET) Subject: [Python-checkins] commit of r41866 - python/trunk/Doc/tools/cvsinfo.py python/trunk/Doc/tools/findacks python/trunk/Doc/tools/mksourcepkg Message-ID: <20060102072214.C55ED1E4002@bag.python.org> Author: fred.drake Date: Mon Jan 2 08:22:12 2006 New Revision: 41866 Removed: python/trunk/Doc/tools/cvsinfo.py python/trunk/Doc/tools/findacks Modified: python/trunk/Doc/tools/mksourcepkg Log: update to reflect move to Subversion Deleted: /python/trunk/Doc/tools/cvsinfo.py ============================================================================== --- /python/trunk/Doc/tools/cvsinfo.py Mon Jan 2 08:22:12 2006 +++ (empty file) @@ -1,81 +0,0 @@ -"""Utility class and function to get information about the CVS repository -based on checked-out files. -""" - -import os - - -def get_repository_list(paths): - d = {} - for name in paths: - if os.path.isfile(name): - dir = os.path.dirname(name) - else: - dir = name - rootfile = os.path.join(name, "CVS", "Root") - root = open(rootfile).readline().strip() - if not d.has_key(root): - d[root] = RepositoryInfo(dir), [name] - else: - d[root][1].append(name) - return d.values() - - -class RepositoryInfo: - """Record holding information about the repository we want to talk to.""" - cvsroot_path = None - branch = None - - # type is '', ':ext', or ':pserver:' - type = "" - - def __init__(self, dir=None): - if dir is None: - dir = os.getcwd() - dir = os.path.join(dir, "CVS") - root = open(os.path.join(dir, "Root")).readline().strip() - if root.startswith(":pserver:"): - self.type = ":pserver:" - root = root[len(":pserver:"):] - elif ":" in root: - if root.startswith(":ext:"): - root = root[len(":ext:"):] - self.type = ":ext:" - self.repository = root - if ":" in root: - host, path = root.split(":", 1) - self.cvsroot_path = path - else: - self.cvsroot_path = root - fn = os.path.join(dir, "Tag") - if os.path.isfile(fn): - self.branch = open(fn).readline().strip()[1:] - - def get_cvsroot(self): - return self.type + self.repository - - _repository_dir_cache = {} - - def get_repository_file(self, path): - filename = os.path.abspath(path) - if os.path.isdir(path): - dir = path - join = 0 - else: - dir = os.path.dirname(path) - join = 1 - try: - repodir = self._repository_dir_cache[dir] - except KeyError: - repofn = os.path.join(dir, "CVS", "Repository") - repodir = open(repofn).readline().strip() - repodir = os.path.join(self.cvsroot_path, repodir) - self._repository_dir_cache[dir] = repodir - if join: - fn = os.path.join(repodir, os.path.basename(path)) - else: - fn = repodir - return fn[len(self.cvsroot_path)+1:] - - def __repr__(self): - return "" % self.get_cvsroot() Deleted: /python/trunk/Doc/tools/findacks ============================================================================== --- /python/trunk/Doc/tools/findacks Mon Jan 2 08:22:12 2006 +++ (empty file) @@ -1,161 +0,0 @@ -#!/usr/bin/env python -"""Script to locate email addresses in the CVS logs.""" -__version__ = '$Revision$' - -import os -import re -import sys -import UserDict - -import cvsinfo - - -class Acknowledgements(UserDict.UserDict): - def add(self, email, name, path): - d = self.data - d.setdefault(email, {})[path] = name - - -def open_cvs_log(info, paths=None): - cvsroot = info.get_cvsroot() - cmd = "cvs -q -d%s log " % cvsroot - if paths: - cmd += " ".join(paths) - return os.popen(cmd, "r") - - -email_rx = re.compile("<([a-z][-a-z0-9._]*@[-a-z0-9.]+)>", re.IGNORECASE) - -def find_acks(f, acks): - prev = '' - filename = None - MAGIC_WORDS = ('van', 'von') - while 1: - line = f.readline() - if not line: - break - if line.startswith("Working file: "): - filename = line.split(None, 2)[2].strip() - prev = line - continue - m = email_rx.search(line) - if m: - words = prev.split() + line[:m.start()].split() - L = [] - while words \ - and (words[-1][0].isupper() or words[-1] in MAGIC_WORDS): - L.insert(0, words.pop()) - name = " ".join(L) - email = m.group(1).lower() - acks.add(email, name, filename) - prev = line - - -def load_cvs_log_acks(acks, args): - repolist = cvsinfo.get_repository_list(args or [""]) - for info, paths in repolist: - print >>sys.stderr, "Repository:", info.get_cvsroot() - f = open_cvs_log(info, paths) - find_acks(f, acks) - f.close() - - -def load_tex_source_acks(acks, args): - for path in args: - path = path or os.curdir - if os.path.isfile(path): - read_acks_from_tex_file(acks, path) - else: - read_acks_from_tex_dir(acks, path) - - -def read_acks_from_tex_file(acks, path): - f = open(path) - while 1: - line = f.readline() - if not line: - break - if line.startswith(r"\sectionauthor{"): - line = line[len(r"\sectionauthor"):] - name, line = extract_tex_group(line) - email, line = extract_tex_group(line) - acks.add(email, name, path) - - -def read_acks_from_tex_dir(acks, path): - stack = [path] - while stack: - p = stack.pop() - for n in os.listdir(p): - n = os.path.join(p, n) - if os.path.isdir(n): - stack.insert(0, n) - elif os.path.normpath(n).endswith(".tex"): - read_acks_from_tex_file(acks, n) - - -def extract_tex_group(s): - c = 0 - for i in range(len(s)): - if s[i] == '{': - c += 1 - elif s[i] == '}': - c -= 1 - if c == 0: - return s[1:i], s[i+1:] - - -def print_acks(acks): - first = 1 - for email, D in acks.items(): - if first: - first = 0 - else: - print - L = D.items() - L.sort() - prefname = L[0][1] - for file, name in L[1:]: - if name != prefname: - prefname = "" - break - if prefname: - print prefname, "<%s>:" % email - else: - print email + ":" - for file, name in L: - if name == prefname: - print " " + file - else: - print " %s (as %s)" % (file, name) - - -def print_ack_names(acks): - names = [] - for email, D in acks.items(): - L = D.items() - L.sort() - prefname = L[0][1] - for file, name in L[1:]: - prefname = prefname or name - names.append(prefname or email) - def f(s1, s2): - s1 = s1.lower() - s2 = s2.lower() - return cmp((s1.split()[-1], s1), - (s2.split()[-1], s2)) - names.sort(f) - for name in names: - print name - - -def main(): - args = sys.argv[1:] - acks = Acknowledgements() - load_cvs_log_acks(acks, args) - load_tex_source_acks(acks, args) - print_ack_names(acks) - - -if __name__ == "__main__": - main() Modified: python/trunk/Doc/tools/mksourcepkg ============================================================================== --- python/trunk/Doc/tools/mksourcepkg (original) +++ python/trunk/Doc/tools/mksourcepkg Mon Jan 2 08:22:12 2006 @@ -24,8 +24,6 @@ import sys import tempfile -import cvsinfo - try: __file__ except NameError: @@ -79,42 +77,22 @@ else: formats = ["gzip"] release = args[0] - cvstag = None + svntag = None if len(args) > 1: - cvstag = args[1] + svntag = args[1] tempdir = tempfile.mktemp() os.mkdir(tempdir) pkgdir = os.path.join(tempdir, "Python-Docs-" + release) - os.mkdir(pkgdir) pwd = os.getcwd() mydir = os.path.abspath(os.path.dirname(sys.argv[0])) - info = cvsinfo.RepositoryInfo(mydir) - cvsroot = info.get_cvsroot() - m = rx.match(cvsroot) - if m and anonymous: - # If this is an authenticated SourceForge repository, convert to - # anonymous usage for the export/checkout, since that avoids the - # SSH overhead. - group = m.group(1) - cvsroot = ":pserver:anonymous at cvs.%s.sourceforge.net:/cvsroot/%s" \ - % (group, group) - # For some reason, SourceForge/CVS doesn't seem to care that we - # might not have done a "cvs login" to the anonymous server. - # That avoids a lot of painful gunk here. os.chdir(tempdir) if not quiet: - print "--- current directory is:", pkgdir - if cvstag: - run("cvs -d%s export -r %s -d Python-Docs-%s python/dist/src/Doc" - % (cvsroot, cvstag, release)) - else: - run("cvs -Q -d%s checkout -d Python-Docs-%s python/dist/src/Doc" - % (cvsroot, release)) - # remove CVS directories - for p in ('*/CVS', '*/*/CVS', '*/*/*/CVS'): - map(shutil.rmtree, glob.glob(p)) - for f in ('.cvsignore', '*/.cvsignore'): - map(os.unlink, glob.glob(f)) + print "--- current directory is:", tempdir + if not svntag: + svntag = "trunk" + svnbase = "http://svn.python.org/projects/python" + run("svn export %s/%s/Doc Python-Docs-%s" + % (svnbase, svntag, release)) # Copy in the version informtation, if we're not just going to # rip it back out: From python-checkins at python.org Mon Jan 2 08:25:08 2006 From: python-checkins at python.org (fred.drake) Date: Mon, 2 Jan 2006 08:25:08 +0100 (CET) Subject: [Python-checkins] commit of r41867 - python/branches/release24-maint/Doc/tools/cvsinfo.py python/branches/release24-maint/Doc/tools/findacks python/branches/release24-maint/Doc/tools/mksourcepkg Message-ID: <20060102072508.CA0691E4002@bag.python.org> Author: fred.drake Date: Mon Jan 2 08:25:08 2006 New Revision: 41867 Removed: python/branches/release24-maint/Doc/tools/cvsinfo.py python/branches/release24-maint/Doc/tools/findacks Modified: python/branches/release24-maint/Doc/tools/mksourcepkg Log: merge revision 41866 from trunk: update to reflect move to Subversion Deleted: /python/branches/release24-maint/Doc/tools/cvsinfo.py ============================================================================== --- /python/branches/release24-maint/Doc/tools/cvsinfo.py Mon Jan 2 08:25:08 2006 +++ (empty file) @@ -1,81 +0,0 @@ -"""Utility class and function to get information about the CVS repository -based on checked-out files. -""" - -import os - - -def get_repository_list(paths): - d = {} - for name in paths: - if os.path.isfile(name): - dir = os.path.dirname(name) - else: - dir = name - rootfile = os.path.join(name, "CVS", "Root") - root = open(rootfile).readline().strip() - if not d.has_key(root): - d[root] = RepositoryInfo(dir), [name] - else: - d[root][1].append(name) - return d.values() - - -class RepositoryInfo: - """Record holding information about the repository we want to talk to.""" - cvsroot_path = None - branch = None - - # type is '', ':ext', or ':pserver:' - type = "" - - def __init__(self, dir=None): - if dir is None: - dir = os.getcwd() - dir = os.path.join(dir, "CVS") - root = open(os.path.join(dir, "Root")).readline().strip() - if root.startswith(":pserver:"): - self.type = ":pserver:" - root = root[len(":pserver:"):] - elif ":" in root: - if root.startswith(":ext:"): - root = root[len(":ext:"):] - self.type = ":ext:" - self.repository = root - if ":" in root: - host, path = root.split(":", 1) - self.cvsroot_path = path - else: - self.cvsroot_path = root - fn = os.path.join(dir, "Tag") - if os.path.isfile(fn): - self.branch = open(fn).readline().strip()[1:] - - def get_cvsroot(self): - return self.type + self.repository - - _repository_dir_cache = {} - - def get_repository_file(self, path): - filename = os.path.abspath(path) - if os.path.isdir(path): - dir = path - join = 0 - else: - dir = os.path.dirname(path) - join = 1 - try: - repodir = self._repository_dir_cache[dir] - except KeyError: - repofn = os.path.join(dir, "CVS", "Repository") - repodir = open(repofn).readline().strip() - repodir = os.path.join(self.cvsroot_path, repodir) - self._repository_dir_cache[dir] = repodir - if join: - fn = os.path.join(repodir, os.path.basename(path)) - else: - fn = repodir - return fn[len(self.cvsroot_path)+1:] - - def __repr__(self): - return "" % self.get_cvsroot() Deleted: /python/branches/release24-maint/Doc/tools/findacks ============================================================================== --- /python/branches/release24-maint/Doc/tools/findacks Mon Jan 2 08:25:08 2006 +++ (empty file) @@ -1,161 +0,0 @@ -#!/usr/bin/env python -"""Script to locate email addresses in the CVS logs.""" -__version__ = '$Revision$' - -import os -import re -import sys -import UserDict - -import cvsinfo - - -class Acknowledgements(UserDict.UserDict): - def add(self, email, name, path): - d = self.data - d.setdefault(email, {})[path] = name - - -def open_cvs_log(info, paths=None): - cvsroot = info.get_cvsroot() - cmd = "cvs -q -d%s log " % cvsroot - if paths: - cmd += " ".join(paths) - return os.popen(cmd, "r") - - -email_rx = re.compile("<([a-z][-a-z0-9._]*@[-a-z0-9.]+)>", re.IGNORECASE) - -def find_acks(f, acks): - prev = '' - filename = None - MAGIC_WORDS = ('van', 'von') - while 1: - line = f.readline() - if not line: - break - if line.startswith("Working file: "): - filename = line.split(None, 2)[2].strip() - prev = line - continue - m = email_rx.search(line) - if m: - words = prev.split() + line[:m.start()].split() - L = [] - while words \ - and (words[-1][0].isupper() or words[-1] in MAGIC_WORDS): - L.insert(0, words.pop()) - name = " ".join(L) - email = m.group(1).lower() - acks.add(email, name, filename) - prev = line - - -def load_cvs_log_acks(acks, args): - repolist = cvsinfo.get_repository_list(args or [""]) - for info, paths in repolist: - print >>sys.stderr, "Repository:", info.get_cvsroot() - f = open_cvs_log(info, paths) - find_acks(f, acks) - f.close() - - -def load_tex_source_acks(acks, args): - for path in args: - path = path or os.curdir - if os.path.isfile(path): - read_acks_from_tex_file(acks, path) - else: - read_acks_from_tex_dir(acks, path) - - -def read_acks_from_tex_file(acks, path): - f = open(path) - while 1: - line = f.readline() - if not line: - break - if line.startswith(r"\sectionauthor{"): - line = line[len(r"\sectionauthor"):] - name, line = extract_tex_group(line) - email, line = extract_tex_group(line) - acks.add(email, name, path) - - -def read_acks_from_tex_dir(acks, path): - stack = [path] - while stack: - p = stack.pop() - for n in os.listdir(p): - n = os.path.join(p, n) - if os.path.isdir(n): - stack.insert(0, n) - elif os.path.normpath(n).endswith(".tex"): - read_acks_from_tex_file(acks, n) - - -def extract_tex_group(s): - c = 0 - for i in range(len(s)): - if s[i] == '{': - c += 1 - elif s[i] == '}': - c -= 1 - if c == 0: - return s[1:i], s[i+1:] - - -def print_acks(acks): - first = 1 - for email, D in acks.items(): - if first: - first = 0 - else: - print - L = D.items() - L.sort() - prefname = L[0][1] - for file, name in L[1:]: - if name != prefname: - prefname = "" - break - if prefname: - print prefname, "<%s>:" % email - else: - print email + ":" - for file, name in L: - if name == prefname: - print " " + file - else: - print " %s (as %s)" % (file, name) - - -def print_ack_names(acks): - names = [] - for email, D in acks.items(): - L = D.items() - L.sort() - prefname = L[0][1] - for file, name in L[1:]: - prefname = prefname or name - names.append(prefname or email) - def f(s1, s2): - s1 = s1.lower() - s2 = s2.lower() - return cmp((s1.split()[-1], s1), - (s2.split()[-1], s2)) - names.sort(f) - for name in names: - print name - - -def main(): - args = sys.argv[1:] - acks = Acknowledgements() - load_cvs_log_acks(acks, args) - load_tex_source_acks(acks, args) - print_ack_names(acks) - - -if __name__ == "__main__": - main() Modified: python/branches/release24-maint/Doc/tools/mksourcepkg ============================================================================== --- python/branches/release24-maint/Doc/tools/mksourcepkg (original) +++ python/branches/release24-maint/Doc/tools/mksourcepkg Mon Jan 2 08:25:08 2006 @@ -24,8 +24,6 @@ import sys import tempfile -import cvsinfo - try: __file__ except NameError: @@ -79,42 +77,22 @@ else: formats = ["gzip"] release = args[0] - cvstag = None + svntag = None if len(args) > 1: - cvstag = args[1] + svntag = args[1] tempdir = tempfile.mktemp() os.mkdir(tempdir) pkgdir = os.path.join(tempdir, "Python-Docs-" + release) - os.mkdir(pkgdir) pwd = os.getcwd() mydir = os.path.abspath(os.path.dirname(sys.argv[0])) - info = cvsinfo.RepositoryInfo(mydir) - cvsroot = info.get_cvsroot() - m = rx.match(cvsroot) - if m and anonymous: - # If this is an authenticated SourceForge repository, convert to - # anonymous usage for the export/checkout, since that avoids the - # SSH overhead. - group = m.group(1) - cvsroot = ":pserver:anonymous at cvs.%s.sourceforge.net:/cvsroot/%s" \ - % (group, group) - # For some reason, SourceForge/CVS doesn't seem to care that we - # might not have done a "cvs login" to the anonymous server. - # That avoids a lot of painful gunk here. os.chdir(tempdir) if not quiet: - print "--- current directory is:", pkgdir - if cvstag: - run("cvs -d%s export -r %s -d Python-Docs-%s python/dist/src/Doc" - % (cvsroot, cvstag, release)) - else: - run("cvs -Q -d%s checkout -d Python-Docs-%s python/dist/src/Doc" - % (cvsroot, release)) - # remove CVS directories - for p in ('*/CVS', '*/*/CVS', '*/*/*/CVS'): - map(shutil.rmtree, glob.glob(p)) - for f in ('.cvsignore', '*/.cvsignore'): - map(os.unlink, glob.glob(f)) + print "--- current directory is:", tempdir + if not svntag: + svntag = "trunk" + svnbase = "http://svn.python.org/projects/python" + run("svn export %s/%s/Doc Python-Docs-%s" + % (svnbase, svntag, release)) # Copy in the version informtation, if we're not just going to # rip it back out: From python-checkins at python.org Mon Jan 2 16:18:46 2006 From: python-checkins at python.org (martin.v.loewis) Date: Mon, 2 Jan 2006 16:18:46 +0100 (CET) Subject: [Python-checkins] commit of r41868 - in python/branches/ssize_t: Doc/Makefile.deps Doc/api/init.tex Doc/api/refcounts.dat Doc/lib/archiving.tex Doc/lib/custominterp.tex Doc/lib/datatypes.tex Doc/lib/development.tex Doc/lib/fileformats.tex Doc/lib/filesys.tex Doc/lib/frameworks.tex Doc/lib/i18n.tex Doc/lib/ipc.tex Doc/lib/language.tex Doc/lib/lib.tex Doc/lib/libbsddb.tex Doc/lib/libcookielib.tex Doc/lib/libcrypto.tex Doc/lib/libcsv.tex Doc/lib/liblogging.tex Doc/lib/libmd5.tex Doc/lib/liboperator.tex Doc/lib/libos.tex Doc/lib/libpickle.tex Doc/lib/libpprint.tex Doc/lib/libprofile.tex Doc/lib/libsocket.tex Doc/lib/libstdtypes.tex Doc/lib/libsubprocess.tex Doc/lib/libsys.tex Doc/lib/libtypes.tex Doc/lib/liburllib2.tex Doc/lib/libweakref.tex Doc/lib/modules.tex Doc/lib/numeric.tex Doc/lib/persistence.tex Doc/ref/ref3.tex Doc/ref/ref5.tex Doc/ref/ref7.tex Doc/tools Doc/tools/cvsinfo.py Doc/tools/findacks Doc/tools/mksourcepkg Grammar/Grammar Include/Python-ast.h Include/Python.h Include/asdl.h Include/ast.h Include/compile.h Include/pyarena.h Include/pythonrun.h Include/structmember.h Lib/SimpleXMLRPCServer.py Lib/_LWPCookieJar.py Lib/_MozillaCookieJar.py Lib/codecs.py Lib/cookielib.py Lib/csv.py Lib/encodings/cp037.py Lib/encodings/cp1006.py Lib/encodings/cp1026.py Lib/encodings/cp1140.py Lib/encodings/cp1250.py Lib/encodings/cp1251.py Lib/encodings/cp1252.py Lib/encodings/cp1253.py Lib/encodings/cp1254.py Lib/encodings/cp1255.py Lib/encodings/cp1256.py Lib/encodings/cp1257.py Lib/encodings/cp1258.py Lib/encodings/cp424.py Lib/encodings/cp437.py Lib/encodings/cp500.py Lib/encodings/cp737.py Lib/encodings/cp775.py Lib/encodings/cp850.py Lib/encodings/cp852.py Lib/encodings/cp855.py Lib/encodings/cp856.py Lib/encodings/cp857.py Lib/encodings/cp860.py Lib/encodings/cp861.py Lib/encodings/cp862.py Lib/encodings/cp863.py Lib/encodings/cp864.py Lib/encodings/cp865.py Lib/encodings/cp866.py Lib/encodings/cp869.py Lib/encodings/cp874.py Lib/encodings/cp875.py Lib/encodings/iso8859_1.py Lib/encodings/iso8859_10.py Lib/encodings/iso8859_11.py Lib/encodings/iso8859_13.py Lib/encodings/iso8859_14.py Lib/encodings/iso8859_15.py Lib/encodings/iso8859_16.py Lib/encodings/iso8859_2.py Lib/encodings/iso8859_3.py Lib/encodings/iso8859_4.py Lib/encodings/iso8859_5.py Lib/encodings/iso8859_6.py Lib/encodings/iso8859_7.py Lib/encodings/iso8859_8.py Lib/encodings/iso8859_9.py Lib/encodings/koi8_r.py Lib/encodings/koi8_u.py Lib/encodings/mac_arabic.py Lib/encodings/mac_centeuro.py Lib/encodings/mac_croatian.py Lib/encodings/mac_cyrillic.py Lib/encodings/mac_farsi.py Lib/encodings/mac_greek.py Lib/encodings/mac_iceland.py Lib/encodings/mac_roman.py Lib/encodings/mac_romanian.py Lib/encodings/mac_turkish.py Lib/encodings/tis_620.py Lib/macurl2path.py Lib/nturl2path.py Lib/plat-riscos/rourl2path.py Lib/platform.py Lib/test/bad_coding2.py Lib/test/test__locale.py Lib/test/test_builtin.py Lib/test/test_code.py Lib/test/test_coding.py Lib/test/test_cookielib.py Lib/test/test_csv.py Lib/test/test_descr.py Lib/test/test_dis.py Lib/test/test_exception_variations.py Lib/test/test_generators.py Lib/test/test_locale.py Lib/test/test_logging.py Lib/test/test_minidom.py Lib/test/test_mmap.py Lib/test/test_operator.py Lib/test/test_poll.py Lib/test/test_quopri.py Lib/test/test_xmlrpc.py Lib/urllib.py Lib/webbrowser.py Lib/xmlcore/etree/ElementInclude.py Lib/xmlcore/etree/ElementPath.py Mac Makefile.pre.in Misc/ACKS Misc/NEWS Modules/Setup.dist Modules/_elementtree.c Modules/_hotshot.c Modules/_tkinter.c Modules/getbuildinfo.c Modules/mmapmodule.c Modules/operator.c Modules/posixmodule.c Modules/pyexpat.c Modules/socketmodule.c Modules/stropmodule.c Objects/abstract.c Objects/bufferobject.c Objects/dictobject.c Objects/typeobject.c PC/_subprocess.c PC/_winreg.c PCbuild/pcbuild.sln PCbuild/pythoncore.vcproj Parser/asdl.py Parser/asdl_c.py Parser/grammar.c Parser/spark.py Parser/tokenizer.c Python/Python-ast.c Python/asdl.c Python/ast.c Python/compile.c Python/dynload_aix.c Python/getargs.c Python/graminit.c Python/import.c Python/modsupport.c Python/pyarena.c Python/pystrtod.c Python/pythonrun.c Python/structmember.c Python/symtable.c Python/sysmodule.c Tools/bgen/bgen/bgenGenerator.py Tools/bgen/bgen/bgenHeapBuffer.py Tools/compiler/dumppyc.py Tools/pynche/StripViewer.py Tools/unicode/gencodec.py configure configure.in setup.py Message-ID: <20060102151846.BF8621E4002@bag.python.org> Author: martin.v.loewis Date: Mon Jan 2 16:17:17 2006 New Revision: 41868 Added: python/branches/ssize_t/Doc/lib/archiving.tex - copied unchanged from r41867, python/trunk/Doc/lib/archiving.tex python/branches/ssize_t/Doc/lib/custominterp.tex - copied unchanged from r41867, python/trunk/Doc/lib/custominterp.tex python/branches/ssize_t/Doc/lib/datatypes.tex - copied unchanged from r41867, python/trunk/Doc/lib/datatypes.tex python/branches/ssize_t/Doc/lib/development.tex - copied unchanged from r41867, python/trunk/Doc/lib/development.tex python/branches/ssize_t/Doc/lib/fileformats.tex - copied unchanged from r41867, python/trunk/Doc/lib/fileformats.tex python/branches/ssize_t/Doc/lib/filesys.tex - copied unchanged from r41867, python/trunk/Doc/lib/filesys.tex python/branches/ssize_t/Doc/lib/frameworks.tex - copied unchanged from r41867, python/trunk/Doc/lib/frameworks.tex python/branches/ssize_t/Doc/lib/i18n.tex - copied unchanged from r41867, python/trunk/Doc/lib/i18n.tex python/branches/ssize_t/Doc/lib/ipc.tex - copied unchanged from r41867, python/trunk/Doc/lib/ipc.tex python/branches/ssize_t/Doc/lib/modules.tex - copied unchanged from r41867, python/trunk/Doc/lib/modules.tex python/branches/ssize_t/Doc/lib/numeric.tex - copied unchanged from r41867, python/trunk/Doc/lib/numeric.tex python/branches/ssize_t/Doc/lib/persistence.tex - copied unchanged from r41867, python/trunk/Doc/lib/persistence.tex python/branches/ssize_t/Include/pyarena.h - copied unchanged from r41867, python/trunk/Include/pyarena.h python/branches/ssize_t/Lib/test/bad_coding2.py - copied unchanged from r41867, python/trunk/Lib/test/bad_coding2.py python/branches/ssize_t/Lib/test/test_exception_variations.py - copied unchanged from r41867, python/trunk/Lib/test/test_exception_variations.py python/branches/ssize_t/Python/pyarena.c - copied unchanged from r41867, python/trunk/Python/pyarena.c Removed: python/branches/ssize_t/Doc/tools/cvsinfo.py python/branches/ssize_t/Doc/tools/findacks Modified: python/branches/ssize_t/ (props changed) python/branches/ssize_t/Doc/Makefile.deps python/branches/ssize_t/Doc/api/init.tex python/branches/ssize_t/Doc/api/refcounts.dat python/branches/ssize_t/Doc/lib/language.tex python/branches/ssize_t/Doc/lib/lib.tex python/branches/ssize_t/Doc/lib/libbsddb.tex python/branches/ssize_t/Doc/lib/libcookielib.tex python/branches/ssize_t/Doc/lib/libcrypto.tex python/branches/ssize_t/Doc/lib/libcsv.tex python/branches/ssize_t/Doc/lib/liblogging.tex python/branches/ssize_t/Doc/lib/libmd5.tex python/branches/ssize_t/Doc/lib/liboperator.tex python/branches/ssize_t/Doc/lib/libos.tex python/branches/ssize_t/Doc/lib/libpickle.tex python/branches/ssize_t/Doc/lib/libpprint.tex python/branches/ssize_t/Doc/lib/libprofile.tex python/branches/ssize_t/Doc/lib/libsocket.tex python/branches/ssize_t/Doc/lib/libstdtypes.tex python/branches/ssize_t/Doc/lib/libsubprocess.tex python/branches/ssize_t/Doc/lib/libsys.tex python/branches/ssize_t/Doc/lib/libtypes.tex python/branches/ssize_t/Doc/lib/liburllib2.tex python/branches/ssize_t/Doc/lib/libweakref.tex python/branches/ssize_t/Doc/ref/ref3.tex python/branches/ssize_t/Doc/ref/ref5.tex python/branches/ssize_t/Doc/ref/ref7.tex python/branches/ssize_t/Doc/tools/ (props changed) python/branches/ssize_t/Doc/tools/mksourcepkg python/branches/ssize_t/Grammar/Grammar python/branches/ssize_t/Include/Python-ast.h python/branches/ssize_t/Include/Python.h python/branches/ssize_t/Include/asdl.h python/branches/ssize_t/Include/ast.h python/branches/ssize_t/Include/compile.h python/branches/ssize_t/Include/pythonrun.h python/branches/ssize_t/Include/structmember.h python/branches/ssize_t/Lib/SimpleXMLRPCServer.py python/branches/ssize_t/Lib/_LWPCookieJar.py python/branches/ssize_t/Lib/_MozillaCookieJar.py python/branches/ssize_t/Lib/codecs.py python/branches/ssize_t/Lib/cookielib.py python/branches/ssize_t/Lib/csv.py python/branches/ssize_t/Lib/encodings/cp037.py python/branches/ssize_t/Lib/encodings/cp1006.py python/branches/ssize_t/Lib/encodings/cp1026.py python/branches/ssize_t/Lib/encodings/cp1140.py python/branches/ssize_t/Lib/encodings/cp1250.py python/branches/ssize_t/Lib/encodings/cp1251.py python/branches/ssize_t/Lib/encodings/cp1252.py python/branches/ssize_t/Lib/encodings/cp1253.py python/branches/ssize_t/Lib/encodings/cp1254.py python/branches/ssize_t/Lib/encodings/cp1255.py python/branches/ssize_t/Lib/encodings/cp1256.py python/branches/ssize_t/Lib/encodings/cp1257.py python/branches/ssize_t/Lib/encodings/cp1258.py python/branches/ssize_t/Lib/encodings/cp424.py python/branches/ssize_t/Lib/encodings/cp437.py python/branches/ssize_t/Lib/encodings/cp500.py python/branches/ssize_t/Lib/encodings/cp737.py python/branches/ssize_t/Lib/encodings/cp775.py python/branches/ssize_t/Lib/encodings/cp850.py python/branches/ssize_t/Lib/encodings/cp852.py python/branches/ssize_t/Lib/encodings/cp855.py python/branches/ssize_t/Lib/encodings/cp856.py python/branches/ssize_t/Lib/encodings/cp857.py python/branches/ssize_t/Lib/encodings/cp860.py python/branches/ssize_t/Lib/encodings/cp861.py python/branches/ssize_t/Lib/encodings/cp862.py python/branches/ssize_t/Lib/encodings/cp863.py python/branches/ssize_t/Lib/encodings/cp864.py python/branches/ssize_t/Lib/encodings/cp865.py python/branches/ssize_t/Lib/encodings/cp866.py python/branches/ssize_t/Lib/encodings/cp869.py python/branches/ssize_t/Lib/encodings/cp874.py python/branches/ssize_t/Lib/encodings/cp875.py python/branches/ssize_t/Lib/encodings/iso8859_1.py python/branches/ssize_t/Lib/encodings/iso8859_10.py python/branches/ssize_t/Lib/encodings/iso8859_11.py python/branches/ssize_t/Lib/encodings/iso8859_13.py python/branches/ssize_t/Lib/encodings/iso8859_14.py python/branches/ssize_t/Lib/encodings/iso8859_15.py python/branches/ssize_t/Lib/encodings/iso8859_16.py python/branches/ssize_t/Lib/encodings/iso8859_2.py python/branches/ssize_t/Lib/encodings/iso8859_3.py python/branches/ssize_t/Lib/encodings/iso8859_4.py python/branches/ssize_t/Lib/encodings/iso8859_5.py python/branches/ssize_t/Lib/encodings/iso8859_6.py python/branches/ssize_t/Lib/encodings/iso8859_7.py python/branches/ssize_t/Lib/encodings/iso8859_8.py python/branches/ssize_t/Lib/encodings/iso8859_9.py python/branches/ssize_t/Lib/encodings/koi8_r.py python/branches/ssize_t/Lib/encodings/koi8_u.py python/branches/ssize_t/Lib/encodings/mac_arabic.py python/branches/ssize_t/Lib/encodings/mac_centeuro.py python/branches/ssize_t/Lib/encodings/mac_croatian.py python/branches/ssize_t/Lib/encodings/mac_cyrillic.py python/branches/ssize_t/Lib/encodings/mac_farsi.py python/branches/ssize_t/Lib/encodings/mac_greek.py python/branches/ssize_t/Lib/encodings/mac_iceland.py python/branches/ssize_t/Lib/encodings/mac_roman.py python/branches/ssize_t/Lib/encodings/mac_romanian.py python/branches/ssize_t/Lib/encodings/mac_turkish.py python/branches/ssize_t/Lib/encodings/tis_620.py python/branches/ssize_t/Lib/macurl2path.py python/branches/ssize_t/Lib/nturl2path.py python/branches/ssize_t/Lib/plat-riscos/rourl2path.py python/branches/ssize_t/Lib/platform.py python/branches/ssize_t/Lib/test/test__locale.py python/branches/ssize_t/Lib/test/test_builtin.py python/branches/ssize_t/Lib/test/test_code.py python/branches/ssize_t/Lib/test/test_coding.py python/branches/ssize_t/Lib/test/test_cookielib.py python/branches/ssize_t/Lib/test/test_csv.py python/branches/ssize_t/Lib/test/test_descr.py python/branches/ssize_t/Lib/test/test_dis.py python/branches/ssize_t/Lib/test/test_generators.py python/branches/ssize_t/Lib/test/test_locale.py python/branches/ssize_t/Lib/test/test_logging.py python/branches/ssize_t/Lib/test/test_minidom.py python/branches/ssize_t/Lib/test/test_mmap.py python/branches/ssize_t/Lib/test/test_operator.py python/branches/ssize_t/Lib/test/test_poll.py python/branches/ssize_t/Lib/test/test_quopri.py python/branches/ssize_t/Lib/test/test_xmlrpc.py python/branches/ssize_t/Lib/urllib.py python/branches/ssize_t/Lib/webbrowser.py python/branches/ssize_t/Lib/xmlcore/etree/ElementInclude.py python/branches/ssize_t/Lib/xmlcore/etree/ElementPath.py python/branches/ssize_t/Mac/ (props changed) python/branches/ssize_t/Makefile.pre.in python/branches/ssize_t/Misc/ACKS python/branches/ssize_t/Misc/NEWS python/branches/ssize_t/Modules/Setup.dist python/branches/ssize_t/Modules/_elementtree.c python/branches/ssize_t/Modules/_hotshot.c python/branches/ssize_t/Modules/_tkinter.c python/branches/ssize_t/Modules/getbuildinfo.c python/branches/ssize_t/Modules/mmapmodule.c python/branches/ssize_t/Modules/operator.c python/branches/ssize_t/Modules/posixmodule.c python/branches/ssize_t/Modules/pyexpat.c python/branches/ssize_t/Modules/socketmodule.c python/branches/ssize_t/Modules/stropmodule.c python/branches/ssize_t/Objects/abstract.c python/branches/ssize_t/Objects/bufferobject.c python/branches/ssize_t/Objects/dictobject.c python/branches/ssize_t/Objects/typeobject.c python/branches/ssize_t/PC/_subprocess.c python/branches/ssize_t/PC/_winreg.c python/branches/ssize_t/PCbuild/pcbuild.sln python/branches/ssize_t/PCbuild/pythoncore.vcproj python/branches/ssize_t/Parser/asdl.py python/branches/ssize_t/Parser/asdl_c.py python/branches/ssize_t/Parser/grammar.c python/branches/ssize_t/Parser/spark.py python/branches/ssize_t/Parser/tokenizer.c python/branches/ssize_t/Python/Python-ast.c python/branches/ssize_t/Python/asdl.c python/branches/ssize_t/Python/ast.c python/branches/ssize_t/Python/compile.c python/branches/ssize_t/Python/dynload_aix.c python/branches/ssize_t/Python/getargs.c python/branches/ssize_t/Python/graminit.c python/branches/ssize_t/Python/import.c python/branches/ssize_t/Python/modsupport.c python/branches/ssize_t/Python/pystrtod.c python/branches/ssize_t/Python/pythonrun.c python/branches/ssize_t/Python/structmember.c python/branches/ssize_t/Python/symtable.c python/branches/ssize_t/Python/sysmodule.c python/branches/ssize_t/Tools/bgen/bgen/bgenGenerator.py python/branches/ssize_t/Tools/bgen/bgen/bgenHeapBuffer.py python/branches/ssize_t/Tools/compiler/dumppyc.py python/branches/ssize_t/Tools/pynche/StripViewer.py python/branches/ssize_t/Tools/unicode/gencodec.py python/branches/ssize_t/configure python/branches/ssize_t/configure.in python/branches/ssize_t/setup.py Log: Merge with trunk:41867 Modified: python/branches/ssize_t/Doc/Makefile.deps ============================================================================== --- python/branches/ssize_t/Doc/Makefile.deps (original) +++ python/branches/ssize_t/Doc/Makefile.deps Mon Jan 2 16:17:17 2006 @@ -271,6 +271,7 @@ lib/libgettext.tex \ lib/libbasehttp.tex \ lib/libcookie.tex \ + lib/libcookielib.tex \ lib/libcopyreg.tex \ lib/libsymbol.tex \ lib/libbinhex.tex \ Modified: python/branches/ssize_t/Doc/api/init.tex ============================================================================== --- python/branches/ssize_t/Doc/api/init.tex (original) +++ python/branches/ssize_t/Doc/api/init.tex Mon Jan 2 16:17:17 2006 @@ -272,6 +272,13 @@ \withsubitem{(in module sys)}{\ttindex{version}} \end{cfuncdesc} +\begin{cfuncdesc}{const char*}{Py_GetBuildNumber}{} + Return a string representing the Subversion revision that this Python + executable was built from. This number is a string because it may contain a + trailing 'M' if Python was built from a mixed revision source tree. + \versionadded{2.5} +\end{cfuncdesc} + \begin{cfuncdesc}{const char*}{Py_GetPlatform}{} Return the platform identifier for the current platform. On \UNIX, this is formed from the ``official'' name of the operating system, Modified: python/branches/ssize_t/Doc/api/refcounts.dat ============================================================================== --- python/branches/ssize_t/Doc/api/refcounts.dat (original) +++ python/branches/ssize_t/Doc/api/refcounts.dat Mon Jan 2 16:17:17 2006 @@ -751,6 +751,11 @@ PyObject_AsFileDescriptor:int::: PyObject_AsFileDescriptor:PyObject*:o:0: +PyObject_Call:PyObject*::+1: +PyObject_Call:PyObject*:callable_object:0: +PyObject_Call:PyObject*:args:0: +PyObject_Call:PyObject*:kw:0: + PyObject_CallFunction:PyObject*::+1: PyObject_CallFunction:PyObject*:callable_object:0: PyObject_CallFunction:char*:format:: Modified: python/branches/ssize_t/Doc/lib/language.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/language.tex (original) +++ python/branches/ssize_t/Doc/lib/language.tex Mon Jan 2 16:17:17 2006 @@ -2,7 +2,7 @@ \label{language}} Python provides a number of modules to assist in working with the -Python language. These module support tokenizing, parsing, syntax +Python language. These modules support tokenizing, parsing, syntax analysis, bytecode disassembly, and various other facilities. These modules include: Modified: python/branches/ssize_t/Doc/lib/lib.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/lib.tex (original) +++ python/branches/ssize_t/Doc/lib/lib.tex Mon Jan 2 16:17:17 2006 @@ -12,7 +12,7 @@ % .idx file \makemodindex % ... and the module index as well. - + \begin{document} \maketitle @@ -66,129 +66,192 @@ \input{libintro} % Introduction + +% ============= +% BUILT-INs +% ============= + \input{libobjs} % Built-in Types, Exceptions and Functions \input{libfuncs} \input{libstdtypes} \input{libexcs} \input{libconsts} -\input{libpython} % Python Runtime Services -\input{libsys} -\input{libgc} -\input{libweakref} -\input{libfpectl} -\input{libatexit} -\input{libtypes} -\input{libuserdict} -\input{liboperator} -\input{libinspect} -\input{libtraceback} -\input{liblinecache} -\input{libpickle} -\input{libcopyreg} % really copy_reg -\input{libshelve} -\input{libcopy} -\input{libmarshal} -\input{libwarnings} -\input{libimp} -\input{libzipimport} -\input{libpkgutil} -\input{libmodulefinder} -\input{libcode} -\input{libcodeop} -\input{libpprint} -\input{librepr} -\input{libnew} -\input{libsite} -\input{libuser} -\input{libbltin} % really __builtin__ -\input{libmain} % really __main__ -\input{libfuture} % really __future__ + +% ============= +% BASIC/GENERAL-PURPOSE OBJECTS +% ============= + +% Strings \input{libstrings} % String Services \input{libstring} \input{libre} \input{libreconvert} -\input{libstruct} +\input{libstruct} % XXX also/better in File Formats? \input{libdifflib} -\input{libfpformat} \input{libstringio} \input{libtextwrap} \input{libcodecs} \input{libunicodedata} \input{libstringprep} +\input{libfpformat} -\input{libmisc} % Miscellaneous Services -\input{libpydoc} -\input{libdoctest} -\input{libunittest} -\input{libtest} -\input{libdecimal} -\input{libmath} -\input{libcmath} -\input{librandom} -\input{libbisect} + +\input{datatypes} % Data types and structures +\input{libdatetime} +\input{libcalendar} \input{libcollections} \input{libheapq} +\input{libbisect} \input{libarray} \input{libsets} +\input{libsched} +\input{libmutex} +\input{libqueue} +\input{libweakref} +\input{libuserdict} + +% General object services +% XXX intro +\input{libtypes} +\input{libnew} +\input{libcopy} +\input{libpprint} +\input{librepr} + + +\input{numeric} % Numeric/Mathematical modules +\input{libmath} +\input{libcmath} +\input{libdecimal} +\input{librandom} + +% Functions, Functional, Generators and Iterators +% XXX intro functional \input{libitertools} \input{libfunctional} +\input{liboperator} % from runtime - better with itertools and functional + + +% ============= +% DATA FORMATS +% ============= + +% Big move - include all the markup and internet formats here + +% MIME & email stuff +\input{netdata} % Internet Data Handling +\input{email} +\input{libmailcap} +\input{libmailbox} +\input{libmhlib} +\input{libmimetools} +\input{libmimetypes} +\input{libmimewriter} +\input{libmimify} +\input{libmultifile} +\input{librfc822} + +% encoding stuff +\input{libbase64} +\input{libbinascii} +\input{libbinhex} +\input{libquopri} +\input{libuu} + +\input{markup} % Structured Markup Processing Tools +\input{libhtmlparser} +\input{libsgmllib} +\input{libhtmllib} +\input{libpyexpat} +\input{xmldom} +\input{xmldomminidom} +\input{xmldompulldom} +\input{xmlsax} +\input{xmlsaxhandler} +\input{xmlsaxutils} +\input{xmlsaxreader} +% \input{libxmllib} + +\input{fileformats} % Miscellaneous file formats +\input{libcsv} \input{libcfgparser} -\input{libfileinput} -\input{libcalendar} -\input{libcmd} -\input{libshlex} +\input{librobotparser} +\input{libnetrc} +\input{libxdrlib} -\input{liballos} % Generic Operating System Services -\input{libos} +\input{libcrypto} % Cryptographic Services +\input{libhashlib} +\input{libhmac} +\input{libmd5} +\input{libsha} + +% ============= +% FILE & DATABASE STORAGE +% ============= + +\input{filesys} % File/directory support \input{libposixpath} % os.path -\input{libdircache} +\input{libfileinput} \input{libstat} \input{libstatvfs} \input{libfilecmp} -\input{libsubprocess} -\input{libpopen2} -\input{libdatetime} -\input{libtime} -\input{libsched} -\input{libmutex} -\input{libgetpass} -\input{libcurses} -\input{libascii} % curses.ascii -\input{libcursespanel} -\input{libgetopt} -\input{liboptparse} \input{libtempfile} -\input{liberrno} \input{libglob} \input{libfnmatch} +\input{liblinecache} \input{libshutil} -\input{liblocale} -\input{libgettext} +\input{libdircache} + + +\input{archiving} % Data compression and archiving +\input{libzlib} +\input{libgzip} +\input{libbz2} +\input{libzipfile} +\input{libtarfile} + + +\input{persistence} % Persistent storage +\input{libpickle} +\input{libcopyreg} % really copy_reg % from runtime... +\input{libshelve} +\input{libmarshal} +\input{libanydbm} +\input{libwhichdb} +\input{libdbm} +\input{libgdbm} +\input{libdbhash} +\input{libbsddb} +\input{libdumbdbm} + + +% ============= +% OS +% ============= + + +\input{liballos} % Generic Operating System Services +\input{libos} +\input{libtime} +\input{liboptparse} +\input{libgetopt} \input{liblogging} +\input{libgetpass} +\input{libcurses} +\input{libascii} % curses.ascii +\input{libcursespanel} \input{libplatform} +\input{liberrno} \input{libsomeos} % Optional Operating System Services -\input{libsignal} -\input{libsocket} \input{libselect} \input{libthread} \input{libthreading} \input{libdummythread} \input{libdummythreading} -\input{libqueue} \input{libmmap} -\input{libanydbm} -\input{libdbhash} -\input{libwhichdb} -\input{libbsddb} -\input{libdumbdbm} -\input{libzlib} -\input{libgzip} -\input{libbz2} -\input{libzipfile} -\input{libtarfile} \input{libreadline} \input{librlcompleter} @@ -199,8 +262,6 @@ \input{libgrp} \input{libcrypt} \input{libdl} -\input{libdbm} -\input{libgdbm} \input{libtermios} \input{libtty} \input{libpty} @@ -212,11 +273,18 @@ \input{libsyslog} \input{libcommands} -\input{libpdb} % The Python Debugger -\input{libprofile} % The Python Profiler -\input{libhotshot} % New profiler -\input{libtimeit} +% ============= +% NETWORK & COMMUNICATIONS +% ============= + +\input{ipc} % Interprocess communication/networking +\input{libsubprocess} +\input{libsocket} +\input{libsignal} +\input{libpopen2} +\input{libasyncore} +\input{libasynchat} \input{internet} % Internet Protocols \input{libwebbrowser} @@ -243,51 +311,10 @@ \input{libxmlrpclib} \input{libsimplexmlrpc} \input{libdocxmlrpc} -\input{libasyncore} -\input{libasynchat} -\input{netdata} % Internet Data Handling -\input{libformatter} - -% MIME & email stuff -\input{email} -\input{libmailcap} -\input{libmailbox} -\input{libmhlib} -\input{libmimetools} -\input{libmimetypes} -\input{libmimewriter} -\input{libmimify} -\input{libmultifile} -\input{librfc822} - -% encoding stuff -\input{libbase64} -\input{libbinascii} -\input{libbinhex} -\input{libquopri} -\input{libuu} -\input{libxdrlib} - -% file formats -\input{libnetrc} -\input{librobotparser} -\input{libcsv} - -\input{markup} % Structured Markup Processing Tools -\input{libhtmlparser} -\input{libsgmllib} -\input{libhtmllib} -\input{libpyexpat} -\input{xmldom} -\input{xmldomminidom} -\input{xmldompulldom} -\input{xmlsax} -\input{xmlsaxhandler} -\input{xmlsaxutils} -\input{xmlsaxreader} -\input{xmletree} -% \input{libxmllib} +% ============= +% MULTIMEDIA +% ============= \input{libmm} % Multimedia Services \input{libaudioop} @@ -302,18 +329,78 @@ \input{libsndhdr} \input{libossaudiodev} -\input{libcrypto} % Cryptographic Services -\input{libhmac} -\input{libhashlib} -\input{libmd5} -\input{libsha} - +% Tkinter is a chapter in its own right. \input{tkinter} +% % Internationalization +\input{i18n} +\input{libgettext} +\input{liblocale} + +% ============= +% PROGRAM FRAMEWORKS +% ============= +\input{frameworks} +\input{libcmd} +\input{libshlex} + + +% ============= +% DEVELOPMENT TOOLS +% ============= +% % Software development support +\input{development} +\input{libpydoc} +\input{libdoctest} +\input{libunittest} +\input{libtest} + +\input{libpdb} % The Python Debugger + +\input{libprofile} % The Python Profiler +\input{libhotshot} % New profiler +\input{libtimeit} + + +% ============= +% PYTHON ENGINE +% ============= + +% Runtime services +\input{libpython} % Python Runtime Services +\input{libsys} +\input{libbltin} % really __builtin__ +\input{libmain} % really __main__ +\input{libwarnings} +\input{libatexit} +\input{libtraceback} +\input{libfuture} % really __future__ +\input{libgc} +\input{libinspect} +\input{libsite} +\input{libuser} +\input{libfpectl} + + +\input{custominterp} % Custom interpreter +\input{libcode} +\input{libcodeop} \input{librestricted} % Restricted Execution \input{librexec} \input{libbastion} + +\input{modules} % Importing Modules +\input{libimp} +\input{libzipimport} +\input{libpkgutil} +\input{libmodulefinder} + + +% ============= +% PYTHON LANGUAGE & COMPILER +% ============= + \input{language} % Python Language Services \input{libparser} \input{libsymbol} @@ -330,6 +417,13 @@ \input{compiler} % compiler package +\input{libmisc} % Miscellaneous Services +\input{libformatter} + +% ============= +% OTHER PLATFORM-SPECIFIC STUFF +% ============= + %\input{libamoeba} % AMOEBA ONLY %\input{libstdwin} % STDWIN ONLY Modified: python/branches/ssize_t/Doc/lib/libbsddb.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libbsddb.tex (original) +++ python/branches/ssize_t/Doc/lib/libbsddb.tex Mon Jan 2 16:17:17 2006 @@ -12,7 +12,8 @@ using the appropriate open call. Bsddb objects behave generally like dictionaries. Keys and values must be strings, however, so to use other objects as keys or to store other kinds of objects the user must -serialize them somehow, typically using marshal.dumps or pickle.dumps. +serialize them somehow, typically using \function{marshal.dumps()} or +\function{pickle.dumps}. Starting with Python 2.3 the \module{bsddb} module requires the Berkeley DB library version 3.2 or later (it is known to work with 3.2 Modified: python/branches/ssize_t/Doc/lib/libcookielib.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libcookielib.tex (original) +++ python/branches/ssize_t/Doc/lib/libcookielib.tex Mon Jan 2 16:17:17 2006 @@ -18,17 +18,18 @@ Both the regular Netscape cookie protocol and the protocol defined by \rfc{2965} are handled. RFC 2965 handling is switched off by default. \rfc{2109} cookies are parsed as Netscape cookies and subsequently -treated as RFC 2965 cookies. Note that the great majority of cookies -on the Internet are Netscape cookies. \module{cookielib} attempts to -follow the de-facto Netscape cookie protocol (which differs -substantially from that set out in the original Netscape -specification), including taking note of the \code{max-age} and -\code{port} cookie-attributes introduced with RFC 2109. \note{The -various named parameters found in \mailheader{Set-Cookie} and -\mailheader{Set-Cookie2} headers (eg. \code{domain} and -\code{expires}) are conventionally referred to as \dfn{attributes}. -To distinguish them from Python attributes, the documentation for this -module uses the term \dfn{cookie-attribute} instead}. +treated either as Netscape or RFC 2965 cookies according to the +'policy' in effect. Note that the great majority of cookies on the +Internet are Netscape cookies. \module{cookielib} attempts to follow +the de-facto Netscape cookie protocol (which differs substantially +from that set out in the original Netscape specification), including +taking note of the \code{max-age} and \code{port} cookie-attributes +introduced with RFC 2109. \note{The various named parameters found in +\mailheader{Set-Cookie} and \mailheader{Set-Cookie2} headers +(eg. \code{domain} and \code{expires}) are conventionally referred to +as \dfn{attributes}. To distinguish them from Python attributes, the +documentation for this module uses the term \dfn{cookie-attribute} +instead}. The module defines the following exception: @@ -74,6 +75,7 @@ blocked_domains=\constant{None}, allowed_domains=\constant{None}, netscape=\constant{True}, rfc2965=\constant{False}, + rfc2109_as_netscape=\constant{None}, hide_cookie2=\constant{False}, strict_domain=\constant{False}, strict_rfc2965_unverifiable=\constant{True}, @@ -92,10 +94,14 @@ objects. \class{DefaultCookiePolicy} implements the standard accept / reject -rules for Netscape and RFC 2965 cookies. RFC 2109 cookies +rules for Netscape and RFC 2965 cookies. By default, RFC 2109 cookies (ie. cookies received in a \mailheader{Set-Cookie} header with a version cookie-attribute of 1) are treated according to the RFC 2965 -rules. \class{DefaultCookiePolicy} also provides some parameters to +rules. However, if RFC 2965 handling is turned off or +\member{rfc2109_as_netscape} is True, RFC 2109 cookies are +'downgraded' by the \class{CookieJar} instance to Netscape cookies, by +setting the \member{version} attribute of the \class{Cookie} instance +to 0. \class{DefaultCookiePolicy} also provides some parameters to allow some fine-tuning of policy. \end{classdesc} @@ -493,6 +499,17 @@ which are all initialised from the constructor arguments of the same name, and which may all be assigned to. +\begin{memberdesc}{rfc2109_as_netscape} +If true, request that the \class{CookieJar} instance downgrade RFC +2109 cookies (ie. cookies received in a \mailheader{Set-Cookie} header +with a version cookie-attribute of 1) to Netscape cookies by setting +the version attribute of the \class{Cookie} instance to 0. The +default value is \constant{None}, in which case RFC 2109 cookies are +downgraded if and only if RFC 2965 handling is turned off. Therefore, +RFC 2109 cookies are downgraded by default. +\versionadded{2.5} +\end{memberdesc} + General strictness switches: \begin{memberdesc}{strict_domain} @@ -567,9 +584,10 @@ \class{Cookie} instances have Python attributes roughly corresponding to the standard cookie-attributes specified in the various cookie standards. The correspondence is not one-to-one, because there are -complicated rules for assigning default values, and because the +complicated rules for assigning default values, because the \code{max-age} and \code{expires} cookie-attributes contain equivalent -information. +information, and because RFC 2109 cookies may be 'downgraded' by +\module{cookielib} from version 1 to version 0 (Netscape) cookies. Assignment to these attributes should not be necessary other than in rare circumstances in a \class{CookiePolicy} method. The class does @@ -577,8 +595,10 @@ doing if you do that. \begin{memberdesc}[Cookie]{version} -Integer or \constant{None}. Netscape cookies have version 0. RFC -2965 and RFC 2109 cookies have version 1. +Integer or \constant{None}. Netscape cookies have \member{version} 0. +RFC 2965 and RFC 2109 cookies have a \code{version} cookie-attribute +of 1. However, note that \module{cookielib} may 'downgrade' RFC 2109 +cookies to Netscape cookies, in which case \member{version} is 0. \end{memberdesc} \begin{memberdesc}[Cookie]{name} Cookie name (a string). @@ -611,6 +631,14 @@ URL linking to a comment from the server explaining the function of this cookie, or \constant{None}. \end{memberdesc} +\begin{memberdesc}[Cookie]{rfc2109} +True if this cookie was received as an RFC 2109 cookie (ie. the cookie +arrived in a \mailheader{Set-Cookie} header, and the value of the +Version cookie-attribute in that header was 1). This attribute is +provided because \module{cookielib} may 'downgrade' RFC 2109 cookies +to Netscape cookies, in which case \member{version} is 0. +\versionadded{2.5} +\end{memberdesc} \begin{memberdesc}[Cookie]{port_specified} True if a port or set of ports was explicitly specified by the server Modified: python/branches/ssize_t/Doc/lib/libcrypto.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libcrypto.tex (original) +++ python/branches/ssize_t/Doc/lib/libcrypto.tex Mon Jan 2 16:17:17 2006 @@ -9,15 +9,11 @@ \localmoduletable Hardcore cypherpunks will probably find the cryptographic modules -written by A.M. Kuchling of further interest; the package adds -built-in modules for DES and IDEA encryption, provides a Python module -for reading and decrypting PGP files, and then some. These modules +written by A.M. Kuchling of further interest; the package contains +modules for various encryption algorithms, most notably AES. These modules are not distributed with Python but available separately. See the URL \url{http://www.amk.ca/python/code/crypto.html} for more information. -\index{PGP} -\index{Pretty Good Privacy} -\indexii{DES}{cipher} -\indexii{IDEA}{cipher} +\indexii{AES}{algorithm} \index{cryptography} \index{Kuchling, Andrew} Modified: python/branches/ssize_t/Doc/lib/libcsv.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libcsv.tex (original) +++ python/branches/ssize_t/Doc/lib/libcsv.tex Mon Jan 2 16:17:17 2006 @@ -196,7 +196,7 @@ The \class{Sniffer} class is used to deduce the format of a CSV file. \end{classdesc} -The \class{Sniffer} class provides a single method: +The \class{Sniffer} class provides two methods: \begin{methoddesc}{sniff}{sample\optional{,delimiters=None}} Analyze the given \var{sample} and return a \class{Dialect} subclass Modified: python/branches/ssize_t/Doc/lib/liblogging.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/liblogging.tex (original) +++ python/branches/ssize_t/Doc/lib/liblogging.tex Mon Jan 2 16:17:17 2006 @@ -59,7 +59,7 @@ logging output. Logging messages are encoded as instances of the \class{LogRecord} class. -When a logger decides to actually log an event, an \class{LogRecord} +When a logger decides to actually log an event, a \class{LogRecord} instance is created from the logging message. Logging messages are subjected to a dispatch mechanism through the Modified: python/branches/ssize_t/Doc/lib/libmd5.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libmd5.tex (original) +++ python/branches/ssize_t/Doc/lib/libmd5.tex Mon Jan 2 16:17:17 2006 @@ -44,7 +44,7 @@ \code{16}. \end{datadesc} -md5 objects support the following methods: +The md5 module provides the following functions: \begin{funcdesc}{new}{\optional{arg}} Return a new md5 object. If \var{arg} is present, the method call Modified: python/branches/ssize_t/Doc/lib/liboperator.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/liboperator.tex (original) +++ python/branches/ssize_t/Doc/lib/liboperator.tex Mon Jan 2 16:17:17 2006 @@ -162,7 +162,7 @@ \begin{funcdesc}{truediv}{a, b} \funcline{__truediv__}{a, b} Return \var{a} \code{/} \var{b} when \code{__future__.division} is in -effect. This is also known as division. +effect. This is also known as ``true'' division. \versionadded{2.2} \end{funcdesc} @@ -237,6 +237,108 @@ \end{funcdesc} +Many operations have an ``in-place'' version. The following functions +provide a more primitive access to in-place operators than the usual +syntax does; for example, the statement \code{x += y} is equivalent to +\code{x = operator.iadd(x, y)}. Another way to put it is to say that +\code{z = operator.iadd(x, y)} is equivalent to the compound statement +\code{z = x; z += y}. + +\begin{funcdesc}{iadd}{a, b} +\funcline{__iadd__}{a, b} +\code{a = iadd(a, b)} is equivalent to \code{a += b}. +\versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{iand}{a, b} +\funcline{__iand__}{a, b} +\code{a = iand(a, b)} is equivalent to \code{a \&= b}. +\versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{iconcat}{a, b} +\funcline{__iconcat__}{a, b} +\code{a = iconcat(a, b)} is equivalent to \code{a += b} for \var{a} +and \var{b} sequences. +\versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{idiv}{a, b} +\funcline{__idiv__}{a, b} +\code{a = idiv(a, b)} is equivalent to \code{a /= b} when +\code{__future__.division} is not in effect. +\versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{ifloordiv}{a, b} +\funcline{__ifloordiv__}{a, b} +\code{a = ifloordiv(a, b)} is equivalent to \code{a //= b}. +\versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{ilshift}{a, b} +\funcline{__ilshift__}{a, b} +\code{a = ilshift(a, b)} is equivalent to \code{a <}\code{<= b}. +\versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{imod}{a, b} +\funcline{__imod__}{a, b} +\code{a = imod(a, b)} is equivalent to \code{a \%= b}. +\versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{imul}{a, b} +\funcline{__imul__}{a, b} +\code{a = imul(a, b)} is equivalent to \code{a *= b}. +\versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{ior}{a, b} +\funcline{__ior__}{a, b} +\code{a = ior(a, b)} is equivalent to \code{a |= b}. +\versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{ipow}{a, b} +\funcline{__ipow__}{a, b} +\code{a = ipow(a, b)} is equivalent to \code{a **= b}. +\versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{irepeat}{a, b} +\funcline{__irepeat__}{a, b} +\code{a = irepeat(a, b)} is equivalent to \code{a *= b} where +\var{a} is a sequence and \var{b} is an integer. +\versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{irshift}{a, b} +\funcline{__irshift__}{a, b} +\code{a = irshift(a, b)} is equivalent to \code{a >}\code{>= b}. +\versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{isub}{a, b} +\funcline{__isub__}{a, b} +\code{a = isub(a, b)} is equivalent to \code{a -= b}. +\versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{itruediv}{a, b} +\funcline{__itruediv__}{a, b} +\code{a = itruediv(a, b)} is equivalent to \code{a /= b} when +\code{__future__.division} is in effect. +\versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{ixor}{a, b} +\funcline{__ixor__}{a, b} +\code{a = ixor(a, b)} is equivalent to \code{a \textasciicircum= b}. +\versionadded{2.5} +\end{funcdesc} + + The \module{operator} module also defines a few predicates to test the type of objects. \note{Be careful not to misinterpret the results of these functions; only \function{isCallable()} has any Modified: python/branches/ssize_t/Doc/lib/libos.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libos.tex (original) +++ python/branches/ssize_t/Doc/lib/libos.tex Mon Jan 2 16:17:17 2006 @@ -875,7 +875,10 @@ but makes all intermediate-level directories needed to contain the leaf directory. Throws an \exception{error} exception if the leaf directory already exists or cannot be created. The default \var{mode} -is \code{0777} (octal). +is \code{0777} (octal). On some systems, \var{mode} is ignored. +Where it is used, the current umask value is first masked out. +\note{\function{makedirs()} will become confused if the path elements +to create include \var{os.pardir}.} \versionadded{1.5.2} \versionchanged[This function now handles UNC paths correctly]{2.3} \end{funcdesc} @@ -930,11 +933,15 @@ \index{directory!deleting} Removes directories recursively. Works like \function{rmdir()} except that, if the leaf directory is -successfully removed, directories corresponding to rightmost path -segments will be pruned way until either the whole path is consumed or -an error is raised (which is ignored, because it generally means that -a parent directory is not empty). Throws an \exception{error} -exception if the leaf directory could not be successfully removed. +successfully removed, \function{removedirs()} +tries to successively remove every parent directory mentioned in +\var{path} until an error is raised (which is ignored, because +it generally means that a parent directory is not empty). +For example, \samp{os.removedirs('foo/bar/baz')} will first remove +the directory \samp{'foo/bar/baz'}, and then remove \samp{'foo/bar'} +and \samp{'foo'} if they are empty. +Raises \exception{OSError} if the leaf directory could not be +successfully removed. \versionadded{1.5.2} \end{funcdesc} Modified: python/branches/ssize_t/Doc/lib/libpickle.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libpickle.tex (original) +++ python/branches/ssize_t/Doc/lib/libpickle.tex Mon Jan 2 16:17:17 2006 @@ -372,6 +372,10 @@ Attempts to pickle unpicklable objects will raise the \exception{PicklingError} exception; when this happens, an unspecified number of bytes may have already been written to the underlying file. +Trying to pickle a highly recursive data structure may exceed the +maximum recursion depth, a \exception{RuntimeError} will be raised +in this case. You can carefully raise this limit with +\function{sys.setrecursionlimit()}. Note that functions (built-in and user-defined) are pickled by ``fully qualified'' name reference, not by value. This means that only the Modified: python/branches/ssize_t/Doc/lib/libpprint.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libpprint.tex (original) +++ python/branches/ssize_t/Doc/lib/libpprint.tex Mon Jan 2 16:17:17 2006 @@ -156,7 +156,7 @@ \begin{methoddesc}{pformat}{object} Return the formatted representation of \var{object}. This takes into -Account the options passed to the \class{PrettyPrinter} constructor. +account the options passed to the \class{PrettyPrinter} constructor. \end{methoddesc} \begin{methoddesc}{pprint}{object} @@ -197,10 +197,10 @@ presented which is already represented in \var{context}, the third return value should be true. Recursive calls to the \method{format()} method should add additional entries for containers to this -dictionary. The fourth argument, \var{maxlevels}, gives the requested +dictionary. The third argument, \var{maxlevels}, gives the requested limit to recursion; this will be \code{0} if there is no requested limit. This argument should be passed unmodified to recursive calls. -The fourth argument, \var{level} gives the current level; recursive +The fourth argument, \var{level}, gives the current level; recursive calls should be passed a value less than that of the current call. \versionadded{2.3} \end{methoddesc} Modified: python/branches/ssize_t/Doc/lib/libprofile.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libprofile.tex (original) +++ python/branches/ssize_t/Doc/lib/libprofile.tex Mon Jan 2 16:17:17 2006 @@ -55,47 +55,47 @@ \index{profiling, deterministic} -\section{How Is This Profiler Different From The Old Profiler?} -\nodename{Profiler Changes} - -(This section is of historical importance only; the old profiler -discussed here was last seen in Python 1.1.) - -The big changes from old profiling module are that you get more -information, and you pay less CPU time. It's not a trade-off, it's a -trade-up. - -To be specific: - -\begin{description} - -\item[Bugs removed:] -Local stack frame is no longer molested, execution time is now charged -to correct functions. - -\item[Accuracy increased:] -Profiler execution time is no longer charged to user's code, -calibration for platform is supported, file reads are not done \emph{by} -profiler \emph{during} profiling (and charged to user's code!). - -\item[Speed increased:] -Overhead CPU cost was reduced by more than a factor of two (perhaps a -factor of five), lightweight profiler module is all that must be -loaded, and the report generating module (\module{pstats}) is not needed -during profiling. - -\item[Recursive functions support:] -Cumulative times in recursive functions are correctly calculated; -recursive entries are counted. - -\item[Large growth in report generating UI:] -Distinct profiles runs can be added together forming a comprehensive -report; functions that import statistics take arbitrary lists of -files; sorting criteria is now based on keywords (instead of 4 integer -options); reports shows what functions were profiled as well as what -profile file was referenced; output format has been improved. - -\end{description} +%\section{How Is This Profiler Different From The Old Profiler?} +%\nodename{Profiler Changes} +% +%(This section is of historical importance only; the old profiler +%discussed here was last seen in Python 1.1.) +% +%The big changes from old profiling module are that you get more +%information, and you pay less CPU time. It's not a trade-off, it's a +%trade-up. +% +%To be specific: +% +%\begin{description} +% +%\item[Bugs removed:] +%Local stack frame is no longer molested, execution time is now charged +%to correct functions. +% +%\item[Accuracy increased:] +%Profiler execution time is no longer charged to user's code, +%calibration for platform is supported, file reads are not done \emph{by} +%profiler \emph{during} profiling (and charged to user's code!). +% +%\item[Speed increased:] +%Overhead CPU cost was reduced by more than a factor of two (perhaps a +%factor of five), lightweight profiler module is all that must be +%loaded, and the report generating module (\module{pstats}) is not needed +%during profiling. +% +%\item[Recursive functions support:] +%Cumulative times in recursive functions are correctly calculated; +%recursive entries are counted. +% +%\item[Large growth in report generating UI:] +%Distinct profiles runs can be added together forming a comprehensive +%report; functions that import statistics take arbitrary lists of +%files; sorting criteria is now based on keywords (instead of 4 integer +%options); reports shows what functions were profiled as well as what +%profile file was referenced; output format has been improved. +% +%\end{description} \section{Instant Users Manual \label{profile-instant}} Modified: python/branches/ssize_t/Doc/lib/libsocket.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libsocket.tex (original) +++ python/branches/ssize_t/Doc/lib/libsocket.tex Mon Jan 2 16:17:17 2006 @@ -557,6 +557,8 @@ at once is specified by \var{bufsize}. See the \UNIX{} manual page \manpage{recv}{2} for the meaning of the optional argument \var{flags}; it defaults to zero. +\note{For best match with hardware and network realities, the value of +\var{bufsize} should be a relatively small power of 2, for example, 4096.} \end{methoddesc} \begin{methoddesc}[socket]{recvfrom}{bufsize\optional{, flags}} Modified: python/branches/ssize_t/Doc/lib/libstdtypes.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libstdtypes.tex (original) +++ python/branches/ssize_t/Doc/lib/libstdtypes.tex Mon Jan 2 16:17:17 2006 @@ -952,8 +952,8 @@ precede the conversion (overrides a "space" flag).} \end{tableii} -The length modifier may be \code{h}, \code{l}, and \code{L} may be -present, but are ignored as they are not necessary for Python. +A length modifier (\code{h}, \code{l}, or \code{L}) may be +present, but is ignored as it is not necessary for Python. The conversion types are: @@ -1261,17 +1261,17 @@ \begin{tableiii}{c|c|l}{code}{Operation}{Equivalent}{Result} \lineiii{\var{s}.update(\var{t})} {\var{s} |= \var{t}} - {return set \var{s} with elements added from \var{t}} + {update set \var{s}, adding elements from \var{t}} \lineiii{\var{s}.intersection_update(\var{t})} {\var{s} \&= \var{t}} - {return set \var{s} keeping only elements also found in \var{t}} + {update set \var{s}, keeping only elements found in both \var{s} and \var{t}} \lineiii{\var{s}.difference_update(\var{t})} {\var{s} -= \var{t}} - {return set \var{s} after removing elements found in \var{t}} + {update set \var{s}, removing elements found in \var{t}} \lineiii{\var{s}.symmetric_difference_update(\var{t})} {\var{s} \textasciicircum= \var{t}} - {return set \var{s} with elements from \var{s} or \var{t} - but not both} + {update set \var{s}, keeping only elements found in either \var{s} or \var{t} + but not in both} \hline \lineiii{\var{s}.add(\var{x})}{} @@ -1606,7 +1606,7 @@ defaults to the current position. The current file position is not changed. Note that if a specified size exceeds the file's current size, the result is platform-dependent: possibilities - include that file may remain unchanged, increase to the specified + include that the file may remain unchanged, increase to the specified size as if zero-filled, or increase to the specified size with undefined new content. Availability: Windows, many \UNIX{} variants. Modified: python/branches/ssize_t/Doc/lib/libsubprocess.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libsubprocess.tex (original) +++ python/branches/ssize_t/Doc/lib/libsubprocess.tex Mon Jan 2 16:17:17 2006 @@ -103,7 +103,7 @@ for the new process. If \var{universal_newlines} is \constant{True}, the file objects stdout -and stderr are opened as a text files, but lines may be terminated by +and stderr are opened as text files, but lines may be terminated by any of \code{'\e n'}, the Unix end-of-line convention, \code{'\e r'}, the Macintosh convention or \code{'\e r\e n'}, the Windows convention. All of these external representations are seen as \code{'\e n'} by the Modified: python/branches/ssize_t/Doc/lib/libsys.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libsys.tex (original) +++ python/branches/ssize_t/Doc/lib/libsys.tex Mon Jan 2 16:17:17 2006 @@ -27,6 +27,13 @@ \versionadded{2.0} \end{datadesc} +\begin{datadesc}{build_number} + A string representing the Subversion revision that this Python executable + was built from. This number is a string because it may contain a trailing + 'M' if Python was built from a mixed revision source tree. + \versionadded{2.5} +\end{datadesc} + \begin{datadesc}{builtin_module_names} A tuple of strings giving the names of all modules that are compiled into this Python interpreter. (This information is not available in Modified: python/branches/ssize_t/Doc/lib/libtypes.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libtypes.tex (original) +++ python/branches/ssize_t/Doc/lib/libtypes.tex Mon Jan 2 16:17:17 2006 @@ -8,7 +8,7 @@ This module defines names for some object types that are used by the standard Python interpreter, but not for the types defined by various extension modules. Also, it does not include some of the types that -arise during processing such the \code{listiterator} type. +arise during processing such as the \code{listiterator} type. It is safe to use \samp{from types import *} --- the module does not export any names besides the ones listed here. New names exported by future versions of this module will all end in Modified: python/branches/ssize_t/Doc/lib/liburllib2.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/liburllib2.tex (original) +++ python/branches/ssize_t/Doc/lib/liburllib2.tex Mon Jan 2 16:17:17 2006 @@ -782,7 +782,8 @@ \end{verbatim} Here we are sending a data-stream to the stdin of a CGI and reading -the data it returns to us: +the data it returns to us. Note that this example will only work when the +Python installation supports SSL. \begin{verbatim} >>> import urllib2 Modified: python/branches/ssize_t/Doc/lib/libweakref.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libweakref.tex (original) +++ python/branches/ssize_t/Doc/lib/libweakref.tex Mon Jan 2 16:17:17 2006 @@ -75,8 +75,8 @@ retrieved by calling the reference object if the referent is still alive; if the referent is no longer alive, calling the reference object will cause \constant{None} to be returned. If \var{callback} is - provided and not \constant{None}, - it will be called when the object is about to be + provided and not \constant{None}, and the returned weakref object is + still alive, the callback will be called when the object is about to be finalized; the weak reference object will be passed as the only parameter to the callback; the referent will no longer be available. Modified: python/branches/ssize_t/Doc/ref/ref3.tex ============================================================================== --- python/branches/ssize_t/Doc/ref/ref3.tex (original) +++ python/branches/ssize_t/Doc/ref/ref3.tex Mon Jan 2 16:17:17 2006 @@ -24,7 +24,8 @@ object's type, under certain controlled conditions. Until this manual undergoes extensive revision, it must now be taken as authoritative only regarding ``classic classes'', that are still the default, for -compatibility purposes, in Python 2.2 and 2.3.} +compatibility purposes, in Python 2.2 and 2.3. For more information, +see \url{http://www.python.org/doc/newstyle.html}.} An object's type determines the operations that the object supports (e.g., ``does it have a length?'') and also defines the possible values for objects of that type. The @@ -1036,14 +1037,39 @@ Classes and instances come in two flavours: old-style or classic, and new-style. -Old-style classes were the only flavour of class available before Python 2.1. While they supported multiple inheritance, the rules for resolving names were chosen for ease of implementation. These rules turn out to make multiple inheritance hard to use in certain situations. +Up to Python 2.1, old-style classes were the only flavour available to the +user. The concept of (old-style) class is unrelated to the concept of type: if +\var{x} is an instance of an old-style class, then \code{x.__class__} +designates the class of \var{x}, but \code{type(x)} is always \code{}. This reflects the fact that all old-style instances, +independently of their class, are implemented with a single built-in type, +called \code{instance}. + +New-style classes were introduced in Python 2.2 to unify classes and types. A +new-style class neither more nor less than a user-defined type. If \var{x} is +an instance of a new-style class, then \code{type(x)} is the same as +\code{x.__class__}. + +The major motivation for introducing new-style classes is to provide a unified +object model with a full meta-model. It also has a number of immediate +benefits, like the ability to subclass most built-in types, or the introduction +of "descriptors", which enable computed properties. + +For compatibility reasons, classes are still old-style by default. New-style +classes are created by specifying another new-style class (i.e.\ a type) as a +parent class, or the "top-level type" \class{object} if no other parent is +needed. The behaviour of new-style classes differs from that of old-style +classes in a number of important details in addition to what \function{type} +returns. Some of these changes are fundamental to the new object model, like +the way special methods are invoked. Others are "fixes" that could not be +implemented before for compatibility concerns, like the method resolution order +in case of multiple inheritance. -New-style classes were introduced in Python 2.1, and change the method resolution order to make multiple inheritance more usable. +This manuel is not up-to-date with respect to new-style classes. For now, +please see \url{http://www.python.org/doc/newstyle.html} for more information. The plan is to eventually drop old-style classes, leaving only the semantics of new-style classes. This change will probably only be feasible in Python 3.0. - - %========================================================================= \section{Special method names\label{specialnames}} @@ -1054,7 +1080,9 @@ classes to define their own behavior with respect to language operators. For instance, if a class defines a method named \method{__getitem__()}, and \code{x} is an instance of -this class, then \code{x[i]} is equivalent to +this class, then \code{x[i]} is equivalent\footnote{This, and other +statements, are only roughly true for instances of new-style +classes.} to \code{x.__getitem__(i)}. Except where mentioned, attempts to execute an operation raise an exception when no appropriate method is defined. \withsubitem{(mapping object method)}{\ttindex{__getitem__()}} Modified: python/branches/ssize_t/Doc/ref/ref5.tex ============================================================================== --- python/branches/ssize_t/Doc/ref/ref5.tex (original) +++ python/branches/ssize_t/Doc/ref/ref5.tex Mon Jan 2 16:17:17 2006 @@ -777,6 +777,12 @@ * y + x \%{} y} be very close to \code{x}. }. +In addition to performing the modulo operation on numbers, the \code{\%} +operator is also overloaded by string and unicode objects to perform +string formatting (also known as interpolation). The syntax for string +formatting is described in the Python Library Reference, section +``Sequence Types''. + \deprecated{2.3}{The floor division operator, the modulo operator, and the \function{divmod()} function are no longer defined for complex numbers. Instead, convert to a floating point number using the Modified: python/branches/ssize_t/Doc/ref/ref7.tex ============================================================================== --- python/branches/ssize_t/Doc/ref/ref7.tex (original) +++ python/branches/ssize_t/Doc/ref/ref7.tex Mon Jan 2 16:17:17 2006 @@ -195,26 +195,25 @@ code for a group of statements: \begin{productionlist} - \production{try_stmt} - {\token{try_exc_stmt} | \token{try_fin_stmt}} - \production{try_exc_stmt} + \production{try_stmt} {try1_stmt | try2_stmt} + \production{try1_stmt} {"try" ":" \token{suite}} \productioncont{("except" [\token{expression} ["," \token{target}]] ":" \token{suite})+} \productioncont{["else" ":" \token{suite}]} - \production{try_fin_stmt} - {"try" ":" \token{suite} - "finally" ":" \token{suite}} + \productioncont{["finally" ":" \token{suite}]} + \production{try2_stmt} + {"try" ":" \token{suite}} + \productioncont{"finally" ":" \token{suite}} \end{productionlist} -There are two forms of \keyword{try} statement: -\keyword{try}...\keyword{except} and -\keyword{try}...\keyword{finally}. These forms cannot be mixed (but -they can be nested in each other). - -The \keyword{try}...\keyword{except} form specifies one or more -exception handlers -(the \keyword{except} clauses). When no exception occurs in the +\versionchanged[In previous versions of Python, +\keyword{try}...\keyword{except}...\keyword{finally} did not work. +\keyword{try}...\keyword{except} had to be nested in +\keyword{try}...\keyword{finally}]{2.5} + +The \keyword{except} clause(s) specify one or more exception handlers. +When no exception occurs in the \keyword{try} clause, no exception handler is executed. When an exception occurs in the \keyword{try} suite, a search for an exception handler is started. This search inspects the except clauses in turn until @@ -232,6 +231,8 @@ If no except clause matches the exception, the search for an exception handler continues in the surrounding code and on the invocation stack. +\footnote{The exception is propogated to the invocation stack only if +there is no \keyword{finally} clause that negates the exception.} If the evaluation of an expression in the header of an except clause raises an exception, the original search for a handler is canceled @@ -277,12 +278,13 @@ \stindex{break} \stindex{continue} -The \keyword{try}...\keyword{finally} form specifies a `cleanup' handler. The -\keyword{try} clause is executed. When no exception occurs, the -\keyword{finally} clause is executed. When an exception occurs in the -\keyword{try} clause, the exception is temporarily saved, the -\keyword{finally} clause is executed, and then the saved exception is -re-raised. If the \keyword{finally} clause raises another exception or +If \keyword{finally} is present, it specifies a `cleanup' handler. The +\keyword{try} clause is executed, including any \keyword{except} and +\keyword{else} clauses. If an exception occurs in any of the clauses +and is not handled, the exception is temporarily saved. The +\keyword{finally} clause is executed. If there is a saved exception, +it is re-raised at the end of the \keyword{finally} clause. +If the \keyword{finally} clause raises another exception or executes a \keyword{return} or \keyword{break} statement, the saved exception is lost. A \keyword{continue} statement is illegal in the \keyword{finally} clause. (The reason is a problem with the current Deleted: /python/branches/ssize_t/Doc/tools/cvsinfo.py ============================================================================== --- /python/branches/ssize_t/Doc/tools/cvsinfo.py Mon Jan 2 16:17:17 2006 +++ (empty file) @@ -1,81 +0,0 @@ -"""Utility class and function to get information about the CVS repository -based on checked-out files. -""" - -import os - - -def get_repository_list(paths): - d = {} - for name in paths: - if os.path.isfile(name): - dir = os.path.dirname(name) - else: - dir = name - rootfile = os.path.join(name, "CVS", "Root") - root = open(rootfile).readline().strip() - if not d.has_key(root): - d[root] = RepositoryInfo(dir), [name] - else: - d[root][1].append(name) - return d.values() - - -class RepositoryInfo: - """Record holding information about the repository we want to talk to.""" - cvsroot_path = None - branch = None - - # type is '', ':ext', or ':pserver:' - type = "" - - def __init__(self, dir=None): - if dir is None: - dir = os.getcwd() - dir = os.path.join(dir, "CVS") - root = open(os.path.join(dir, "Root")).readline().strip() - if root.startswith(":pserver:"): - self.type = ":pserver:" - root = root[len(":pserver:"):] - elif ":" in root: - if root.startswith(":ext:"): - root = root[len(":ext:"):] - self.type = ":ext:" - self.repository = root - if ":" in root: - host, path = root.split(":", 1) - self.cvsroot_path = path - else: - self.cvsroot_path = root - fn = os.path.join(dir, "Tag") - if os.path.isfile(fn): - self.branch = open(fn).readline().strip()[1:] - - def get_cvsroot(self): - return self.type + self.repository - - _repository_dir_cache = {} - - def get_repository_file(self, path): - filename = os.path.abspath(path) - if os.path.isdir(path): - dir = path - join = 0 - else: - dir = os.path.dirname(path) - join = 1 - try: - repodir = self._repository_dir_cache[dir] - except KeyError: - repofn = os.path.join(dir, "CVS", "Repository") - repodir = open(repofn).readline().strip() - repodir = os.path.join(self.cvsroot_path, repodir) - self._repository_dir_cache[dir] = repodir - if join: - fn = os.path.join(repodir, os.path.basename(path)) - else: - fn = repodir - return fn[len(self.cvsroot_path)+1:] - - def __repr__(self): - return "" % self.get_cvsroot() Deleted: /python/branches/ssize_t/Doc/tools/findacks ============================================================================== --- /python/branches/ssize_t/Doc/tools/findacks Mon Jan 2 16:17:17 2006 +++ (empty file) @@ -1,161 +0,0 @@ -#!/usr/bin/env python -"""Script to locate email addresses in the CVS logs.""" -__version__ = '$Revision$' - -import os -import re -import sys -import UserDict - -import cvsinfo - - -class Acknowledgements(UserDict.UserDict): - def add(self, email, name, path): - d = self.data - d.setdefault(email, {})[path] = name - - -def open_cvs_log(info, paths=None): - cvsroot = info.get_cvsroot() - cmd = "cvs -q -d%s log " % cvsroot - if paths: - cmd += " ".join(paths) - return os.popen(cmd, "r") - - -email_rx = re.compile("<([a-z][-a-z0-9._]*@[-a-z0-9.]+)>", re.IGNORECASE) - -def find_acks(f, acks): - prev = '' - filename = None - MAGIC_WORDS = ('van', 'von') - while 1: - line = f.readline() - if not line: - break - if line.startswith("Working file: "): - filename = line.split(None, 2)[2].strip() - prev = line - continue - m = email_rx.search(line) - if m: - words = prev.split() + line[:m.start()].split() - L = [] - while words \ - and (words[-1][0].isupper() or words[-1] in MAGIC_WORDS): - L.insert(0, words.pop()) - name = " ".join(L) - email = m.group(1).lower() - acks.add(email, name, filename) - prev = line - - -def load_cvs_log_acks(acks, args): - repolist = cvsinfo.get_repository_list(args or [""]) - for info, paths in repolist: - print >>sys.stderr, "Repository:", info.get_cvsroot() - f = open_cvs_log(info, paths) - find_acks(f, acks) - f.close() - - -def load_tex_source_acks(acks, args): - for path in args: - path = path or os.curdir - if os.path.isfile(path): - read_acks_from_tex_file(acks, path) - else: - read_acks_from_tex_dir(acks, path) - - -def read_acks_from_tex_file(acks, path): - f = open(path) - while 1: - line = f.readline() - if not line: - break - if line.startswith(r"\sectionauthor{"): - line = line[len(r"\sectionauthor"):] - name, line = extract_tex_group(line) - email, line = extract_tex_group(line) - acks.add(email, name, path) - - -def read_acks_from_tex_dir(acks, path): - stack = [path] - while stack: - p = stack.pop() - for n in os.listdir(p): - n = os.path.join(p, n) - if os.path.isdir(n): - stack.insert(0, n) - elif os.path.normpath(n).endswith(".tex"): - read_acks_from_tex_file(acks, n) - - -def extract_tex_group(s): - c = 0 - for i in range(len(s)): - if s[i] == '{': - c += 1 - elif s[i] == '}': - c -= 1 - if c == 0: - return s[1:i], s[i+1:] - - -def print_acks(acks): - first = 1 - for email, D in acks.items(): - if first: - first = 0 - else: - print - L = D.items() - L.sort() - prefname = L[0][1] - for file, name in L[1:]: - if name != prefname: - prefname = "" - break - if prefname: - print prefname, "<%s>:" % email - else: - print email + ":" - for file, name in L: - if name == prefname: - print " " + file - else: - print " %s (as %s)" % (file, name) - - -def print_ack_names(acks): - names = [] - for email, D in acks.items(): - L = D.items() - L.sort() - prefname = L[0][1] - for file, name in L[1:]: - prefname = prefname or name - names.append(prefname or email) - def f(s1, s2): - s1 = s1.lower() - s2 = s2.lower() - return cmp((s1.split()[-1], s1), - (s2.split()[-1], s2)) - names.sort(f) - for name in names: - print name - - -def main(): - args = sys.argv[1:] - acks = Acknowledgements() - load_cvs_log_acks(acks, args) - load_tex_source_acks(acks, args) - print_ack_names(acks) - - -if __name__ == "__main__": - main() Modified: python/branches/ssize_t/Doc/tools/mksourcepkg ============================================================================== --- python/branches/ssize_t/Doc/tools/mksourcepkg (original) +++ python/branches/ssize_t/Doc/tools/mksourcepkg Mon Jan 2 16:17:17 2006 @@ -24,8 +24,6 @@ import sys import tempfile -import cvsinfo - try: __file__ except NameError: @@ -79,42 +77,22 @@ else: formats = ["gzip"] release = args[0] - cvstag = None + svntag = None if len(args) > 1: - cvstag = args[1] + svntag = args[1] tempdir = tempfile.mktemp() os.mkdir(tempdir) pkgdir = os.path.join(tempdir, "Python-Docs-" + release) - os.mkdir(pkgdir) pwd = os.getcwd() mydir = os.path.abspath(os.path.dirname(sys.argv[0])) - info = cvsinfo.RepositoryInfo(mydir) - cvsroot = info.get_cvsroot() - m = rx.match(cvsroot) - if m and anonymous: - # If this is an authenticated SourceForge repository, convert to - # anonymous usage for the export/checkout, since that avoids the - # SSH overhead. - group = m.group(1) - cvsroot = ":pserver:anonymous at cvs.%s.sourceforge.net:/cvsroot/%s" \ - % (group, group) - # For some reason, SourceForge/CVS doesn't seem to care that we - # might not have done a "cvs login" to the anonymous server. - # That avoids a lot of painful gunk here. os.chdir(tempdir) if not quiet: - print "--- current directory is:", pkgdir - if cvstag: - run("cvs -d%s export -r %s -d Python-Docs-%s python/dist/src/Doc" - % (cvsroot, cvstag, release)) - else: - run("cvs -Q -d%s checkout -d Python-Docs-%s python/dist/src/Doc" - % (cvsroot, release)) - # remove CVS directories - for p in ('*/CVS', '*/*/CVS', '*/*/*/CVS'): - map(shutil.rmtree, glob.glob(p)) - for f in ('.cvsignore', '*/.cvsignore'): - map(os.unlink, glob.glob(f)) + print "--- current directory is:", tempdir + if not svntag: + svntag = "trunk" + svnbase = "http://svn.python.org/projects/python" + run("svn export %s/%s/Doc Python-Docs-%s" + % (svnbase, svntag, release)) # Copy in the version informtation, if we're not just going to # rip it back out: Modified: python/branches/ssize_t/Grammar/Grammar ============================================================================== --- python/branches/ssize_t/Grammar/Grammar (original) +++ python/branches/ssize_t/Grammar/Grammar Mon Jan 2 16:17:17 2006 @@ -32,17 +32,23 @@ decorators: decorator+ funcdef: [decorators] 'def' NAME parameters ':' suite parameters: '(' [varargslist] ')' -varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) | fpdef ['=' test] (',' fpdef ['=' test])* [','] +varargslist: ((fpdef ['=' test] ',')* + ('*' NAME [',' '**' NAME] | '**' NAME) | + fpdef ['=' test] (',' fpdef ['=' test])* [',']) fpdef: NAME | '(' fplist ')' fplist: fpdef (',' fpdef)* [','] stmt: simple_stmt | compound_stmt simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | exec_stmt | assert_stmt -expr_stmt: testlist (augassign (yield_expr|testlist) | ('=' (yield_expr|testlist))*) -augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '**=' | '//=' +small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | exec_stmt | assert_stmt) +expr_stmt: testlist (augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist))*) +augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') # For normal assignments, additional restrictions enforced by the interpreter -print_stmt: 'print' ( [ test (',' test)* [','] ] | '>>' test [ (',' test)+ [','] ] ) +print_stmt: 'print' ( [ test (',' test)* [','] ] | + '>>' test [ (',' test)+ [','] ] ) del_stmt: 'del' exprlist pass_stmt: 'pass' flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt @@ -53,7 +59,8 @@ raise_stmt: 'raise' [test [',' test [',' test]]] import_stmt: import_name | import_from import_name: 'import' dotted_as_names -import_from: 'from' dotted_name 'import' ('*' | '(' import_as_names ')' | import_as_names) +import_from: ('from' ('.')* dotted_name + 'import' ('*' | '(' import_as_names ')' | import_as_names)) import_as_name: NAME [NAME NAME] dotted_as_name: dotted_name [NAME NAME] import_as_names: import_as_name (',' import_as_name)* [','] @@ -67,8 +74,11 @@ if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] while_stmt: 'while' test ':' suite ['else' ':' suite] for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] -try_stmt: ('try' ':' suite (except_clause ':' suite)+ #diagram:break - ['else' ':' suite] | 'try' ':' suite 'finally' ':' suite) +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) # NB compile.c makes sure that the default except clause is last except_clause: 'except' [test [',' test]] suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT @@ -86,7 +96,11 @@ term: factor (('*'|'/'|'%'|'//') factor)* factor: ('+'|'-'|'~') factor | power power: atom trailer* ['**' factor] -atom: '(' [yield_expr|testlist_gexp] ')' | '[' [listmaker] ']' | '{' [dictmaker] '}' | '`' testlist1 '`' | NAME | NUMBER | STRING+ +atom: ('(' [yield_expr|testlist_gexp] ')' | + '[' [listmaker] ']' | + '{' [dictmaker] '}' | + '`' testlist1 '`' | + NAME | NUMBER | STRING+) listmaker: test ( list_for | (',' test)* [','] ) testlist_gexp: test ( gen_for | (',' test)* [','] ) lambdef: 'lambda' [varargslist] ':' test @@ -118,4 +132,3 @@ encoding_decl: NAME yield_expr: 'yield' [testlist] - Modified: python/branches/ssize_t/Include/Python-ast.h ============================================================================== --- python/branches/ssize_t/Include/Python-ast.h (original) +++ python/branches/ssize_t/Include/Python-ast.h Mon Jan 2 16:17:17 2006 @@ -328,81 +328,79 @@ }; -mod_ty Module(asdl_seq * body); -mod_ty Interactive(asdl_seq * body); -mod_ty Expression(expr_ty body); -mod_ty Suite(asdl_seq * body); +mod_ty Module(asdl_seq * body, PyArena *arena); +mod_ty Interactive(asdl_seq * body, PyArena *arena); +mod_ty Expression(expr_ty body, PyArena *arena); +mod_ty Suite(asdl_seq * body, PyArena *arena); stmt_ty FunctionDef(identifier name, arguments_ty args, asdl_seq * body, - asdl_seq * decorators, int lineno); + asdl_seq * decorators, int lineno, PyArena *arena); stmt_ty ClassDef(identifier name, asdl_seq * bases, asdl_seq * body, int - lineno); -stmt_ty Return(expr_ty value, int lineno); -stmt_ty Delete(asdl_seq * targets, int lineno); -stmt_ty Assign(asdl_seq * targets, expr_ty value, int lineno); -stmt_ty AugAssign(expr_ty target, operator_ty op, expr_ty value, int lineno); -stmt_ty Print(expr_ty dest, asdl_seq * values, bool nl, int lineno); + lineno, PyArena *arena); +stmt_ty Return(expr_ty value, int lineno, PyArena *arena); +stmt_ty Delete(asdl_seq * targets, int lineno, PyArena *arena); +stmt_ty Assign(asdl_seq * targets, expr_ty value, int lineno, PyArena *arena); +stmt_ty AugAssign(expr_ty target, operator_ty op, expr_ty value, int lineno, + PyArena *arena); +stmt_ty Print(expr_ty dest, asdl_seq * values, bool nl, int lineno, PyArena + *arena); stmt_ty For(expr_ty target, expr_ty iter, asdl_seq * body, asdl_seq * orelse, - int lineno); -stmt_ty While(expr_ty test, asdl_seq * body, asdl_seq * orelse, int lineno); -stmt_ty If(expr_ty test, asdl_seq * body, asdl_seq * orelse, int lineno); -stmt_ty Raise(expr_ty type, expr_ty inst, expr_ty tback, int lineno); + int lineno, PyArena *arena); +stmt_ty While(expr_ty test, asdl_seq * body, asdl_seq * orelse, int lineno, + PyArena *arena); +stmt_ty If(expr_ty test, asdl_seq * body, asdl_seq * orelse, int lineno, + PyArena *arena); +stmt_ty Raise(expr_ty type, expr_ty inst, expr_ty tback, int lineno, PyArena + *arena); stmt_ty TryExcept(asdl_seq * body, asdl_seq * handlers, asdl_seq * orelse, int - lineno); -stmt_ty TryFinally(asdl_seq * body, asdl_seq * finalbody, int lineno); -stmt_ty Assert(expr_ty test, expr_ty msg, int lineno); -stmt_ty Import(asdl_seq * names, int lineno); -stmt_ty ImportFrom(identifier module, asdl_seq * names, int lineno); -stmt_ty Exec(expr_ty body, expr_ty globals, expr_ty locals, int lineno); -stmt_ty Global(asdl_seq * names, int lineno); -stmt_ty Expr(expr_ty value, int lineno); -stmt_ty Pass(int lineno); -stmt_ty Break(int lineno); -stmt_ty Continue(int lineno); -expr_ty BoolOp(boolop_ty op, asdl_seq * values, int lineno); -expr_ty BinOp(expr_ty left, operator_ty op, expr_ty right, int lineno); -expr_ty UnaryOp(unaryop_ty op, expr_ty operand, int lineno); -expr_ty Lambda(arguments_ty args, expr_ty body, int lineno); -expr_ty Dict(asdl_seq * keys, asdl_seq * values, int lineno); -expr_ty ListComp(expr_ty elt, asdl_seq * generators, int lineno); -expr_ty GeneratorExp(expr_ty elt, asdl_seq * generators, int lineno); -expr_ty Yield(expr_ty value, int lineno); + lineno, PyArena *arena); +stmt_ty TryFinally(asdl_seq * body, asdl_seq * finalbody, int lineno, PyArena + *arena); +stmt_ty Assert(expr_ty test, expr_ty msg, int lineno, PyArena *arena); +stmt_ty Import(asdl_seq * names, int lineno, PyArena *arena); +stmt_ty ImportFrom(identifier module, asdl_seq * names, int lineno, PyArena + *arena); +stmt_ty Exec(expr_ty body, expr_ty globals, expr_ty locals, int lineno, PyArena + *arena); +stmt_ty Global(asdl_seq * names, int lineno, PyArena *arena); +stmt_ty Expr(expr_ty value, int lineno, PyArena *arena); +stmt_ty Pass(int lineno, PyArena *arena); +stmt_ty Break(int lineno, PyArena *arena); +stmt_ty Continue(int lineno, PyArena *arena); +expr_ty BoolOp(boolop_ty op, asdl_seq * values, int lineno, PyArena *arena); +expr_ty BinOp(expr_ty left, operator_ty op, expr_ty right, int lineno, PyArena + *arena); +expr_ty UnaryOp(unaryop_ty op, expr_ty operand, int lineno, PyArena *arena); +expr_ty Lambda(arguments_ty args, expr_ty body, int lineno, PyArena *arena); +expr_ty Dict(asdl_seq * keys, asdl_seq * values, int lineno, PyArena *arena); +expr_ty ListComp(expr_ty elt, asdl_seq * generators, int lineno, PyArena + *arena); +expr_ty GeneratorExp(expr_ty elt, asdl_seq * generators, int lineno, PyArena + *arena); +expr_ty Yield(expr_ty value, int lineno, PyArena *arena); expr_ty Compare(expr_ty left, asdl_seq * ops, asdl_seq * comparators, int - lineno); + lineno, PyArena *arena); expr_ty Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, expr_ty - starargs, expr_ty kwargs, int lineno); -expr_ty Repr(expr_ty value, int lineno); -expr_ty Num(object n, int lineno); -expr_ty Str(string s, int lineno); + starargs, expr_ty kwargs, int lineno, PyArena *arena); +expr_ty Repr(expr_ty value, int lineno, PyArena *arena); +expr_ty Num(object n, int lineno, PyArena *arena); +expr_ty Str(string s, int lineno, PyArena *arena); expr_ty Attribute(expr_ty value, identifier attr, expr_context_ty ctx, int - lineno); + lineno, PyArena *arena); expr_ty Subscript(expr_ty value, slice_ty slice, expr_context_ty ctx, int - lineno); -expr_ty Name(identifier id, expr_context_ty ctx, int lineno); -expr_ty List(asdl_seq * elts, expr_context_ty ctx, int lineno); -expr_ty Tuple(asdl_seq * elts, expr_context_ty ctx, int lineno); -slice_ty Ellipsis(void); -slice_ty Slice(expr_ty lower, expr_ty upper, expr_ty step); -slice_ty ExtSlice(asdl_seq * dims); -slice_ty Index(expr_ty value); -comprehension_ty comprehension(expr_ty target, expr_ty iter, asdl_seq * ifs); -excepthandler_ty excepthandler(expr_ty type, expr_ty name, asdl_seq * body); + lineno, PyArena *arena); +expr_ty Name(identifier id, expr_context_ty ctx, int lineno, PyArena *arena); +expr_ty List(asdl_seq * elts, expr_context_ty ctx, int lineno, PyArena *arena); +expr_ty Tuple(asdl_seq * elts, expr_context_ty ctx, int lineno, PyArena *arena); +slice_ty Ellipsis(PyArena *arena); +slice_ty Slice(expr_ty lower, expr_ty upper, expr_ty step, PyArena *arena); +slice_ty ExtSlice(asdl_seq * dims, PyArena *arena); +slice_ty Index(expr_ty value, PyArena *arena); +comprehension_ty comprehension(expr_ty target, expr_ty iter, asdl_seq * ifs, + PyArena *arena); +excepthandler_ty excepthandler(expr_ty type, expr_ty name, asdl_seq * body, + PyArena *arena); arguments_ty arguments(asdl_seq * args, identifier vararg, identifier kwarg, - asdl_seq * defaults); -keyword_ty keyword(identifier arg, expr_ty value); -alias_ty alias(identifier name, identifier asname); - -void free_mod(mod_ty); -void free_stmt(stmt_ty); -void free_expr(expr_ty); -void free_expr_context(expr_context_ty); -void free_slice(slice_ty); -void free_boolop(boolop_ty); -void free_operator(operator_ty); -void free_unaryop(unaryop_ty); -void free_cmpop(cmpop_ty); -void free_comprehension(comprehension_ty); -void free_excepthandler(excepthandler_ty); -void free_arguments(arguments_ty); -void free_keyword(keyword_ty); -void free_alias(alias_ty); + asdl_seq * defaults, PyArena *arena); +keyword_ty keyword(identifier arg, expr_ty value, PyArena *arena); +alias_ty alias(identifier name, identifier asname, PyArena *arena); Modified: python/branches/ssize_t/Include/Python.h ============================================================================== --- python/branches/ssize_t/Include/Python.h (original) +++ python/branches/ssize_t/Include/Python.h Mon Jan 2 16:17:17 2006 @@ -113,6 +113,7 @@ #include "pystate.h" +#include "pyarena.h" #include "modsupport.h" #include "pythonrun.h" #include "ceval.h" Modified: python/branches/ssize_t/Include/asdl.h ============================================================================== --- python/branches/ssize_t/Include/asdl.h (original) +++ python/branches/ssize_t/Include/asdl.h Mon Jan 2 16:17:17 2006 @@ -23,7 +23,7 @@ void *elements[1]; } asdl_seq; -asdl_seq *asdl_seq_new(int size); +asdl_seq *asdl_seq_new(int size, PyArena *arena); void asdl_seq_free(asdl_seq *); #ifdef Py_DEBUG Modified: python/branches/ssize_t/Include/ast.h ============================================================================== --- python/branches/ssize_t/Include/ast.h (original) +++ python/branches/ssize_t/Include/ast.h Mon Jan 2 16:17:17 2006 @@ -5,7 +5,7 @@ #endif PyAPI_FUNC(mod_ty) PyAST_FromNode(const node *, PyCompilerFlags *flags, - const char *); + const char *, PyArena *); #ifdef __cplusplus } Modified: python/branches/ssize_t/Include/compile.h ============================================================================== --- python/branches/ssize_t/Include/compile.h (original) +++ python/branches/ssize_t/Include/compile.h Mon Jan 2 16:17:17 2006 @@ -25,7 +25,7 @@ struct _mod; /* Declare the existence of this type */ PyAPI_FUNC(PyCodeObject *) PyAST_Compile(struct _mod *, const char *, - PyCompilerFlags *); + PyCompilerFlags *, PyArena *); PyAPI_FUNC(PyFutureFeatures *) PyFuture_FromAST(struct _mod *, const char *); #define ERR_LATE_FUTURE \ Modified: python/branches/ssize_t/Include/pythonrun.h ============================================================================== --- python/branches/ssize_t/Include/pythonrun.h (original) +++ python/branches/ssize_t/Include/pythonrun.h Mon Jan 2 16:17:17 2006 @@ -37,10 +37,12 @@ PyAPI_FUNC(int) PyRun_InteractiveLoopFlags(FILE *, const char *, PyCompilerFlags *); PyAPI_FUNC(struct _mod *) PyParser_ASTFromString(const char *, const char *, - int, PyCompilerFlags *flags); + int, PyCompilerFlags *flags, + PyArena *); PyAPI_FUNC(struct _mod *) PyParser_ASTFromFile(FILE *, const char *, int, char *, char *, - PyCompilerFlags *, int *); + PyCompilerFlags *, int *, + PyArena *); #define PyParser_SimpleParseString(S, B) \ PyParser_SimpleParseStringFlags(S, B, 0) #define PyParser_SimpleParseFile(FP, S, B) \ @@ -106,6 +108,7 @@ PyAPI_FUNC(const char *) Py_GetCopyright(void); PyAPI_FUNC(const char *) Py_GetCompiler(void); PyAPI_FUNC(const char *) Py_GetBuildInfo(void); +PyAPI_FUNC(const char *) Py_GetBuildNumber(void); /* Internal -- various one-time initializations */ PyAPI_FUNC(PyObject *) _PyBuiltin_Init(void); Modified: python/branches/ssize_t/Include/structmember.h ============================================================================== --- python/branches/ssize_t/Include/structmember.h (original) +++ python/branches/ssize_t/Include/structmember.h Mon Jan 2 16:17:17 2006 @@ -79,11 +79,11 @@ /* Obsolete API, for binary backwards compatibility */ -PyAPI_FUNC(PyObject *) PyMember_Get(char *, struct memberlist *, char *); -PyAPI_FUNC(int) PyMember_Set(char *, struct memberlist *, char *, PyObject *); +PyAPI_FUNC(PyObject *) PyMember_Get(const char *, struct memberlist *, const char *); +PyAPI_FUNC(int) PyMember_Set(char *, struct memberlist *, const char *, PyObject *); /* Current API, use this */ -PyAPI_FUNC(PyObject *) PyMember_GetOne(char *, struct PyMemberDef *); +PyAPI_FUNC(PyObject *) PyMember_GetOne(const char *, struct PyMemberDef *); PyAPI_FUNC(int) PyMember_SetOne(char *, struct PyMemberDef *, PyObject *); Modified: python/branches/ssize_t/Lib/SimpleXMLRPCServer.py ============================================================================== --- python/branches/ssize_t/Lib/SimpleXMLRPCServer.py (original) +++ python/branches/ssize_t/Lib/SimpleXMLRPCServer.py Mon Jan 2 16:17:17 2006 @@ -253,10 +253,10 @@ response = self._dispatch(method, params) # wrap response in a singleton tuple response = (response,) - response = xmlrpclib.dumps(response, methodresponse=1, + response = xmlrpclib.dumps(response, methodresponse=1, allow_none=self.allow_none, encoding=self.encoding) except Fault, fault: - response = xmlrpclib.dumps(fault, allow_none=self.allow_none, + response = xmlrpclib.dumps(fault, allow_none=self.allow_none, encoding=self.encoding) except: # report exception back to server @@ -427,8 +427,8 @@ """ try: - # Get arguments by reading body of request. - # We read this in chunks to avoid straining + # Get arguments by reading body of request. + # We read this in chunks to avoid straining # socket.read(); around the 10 or 15Mb mark, some platforms # begin to have problems (bug #792570). max_chunk_size = 10*1024*1024 @@ -490,8 +490,8 @@ SimpleXMLRPCDispatcher.__init__(self, allow_none, encoding) SocketServer.TCPServer.__init__(self, addr, requestHandler) - # [Bug #1222790] If possible, set close-on-exec flag; if a - # method spawns a subprocess, the subprocess shouldn't have + # [Bug #1222790] If possible, set close-on-exec flag; if a + # method spawns a subprocess, the subprocess shouldn't have # the listening socket open. if hasattr(fcntl, 'FD_CLOEXEC'): flags = fcntl.fcntl(self.fileno(), fcntl.F_GETFD) Modified: python/branches/ssize_t/Lib/_LWPCookieJar.py ============================================================================== --- python/branches/ssize_t/Lib/_LWPCookieJar.py (original) +++ python/branches/ssize_t/Lib/_LWPCookieJar.py Mon Jan 2 16:17:17 2006 @@ -12,8 +12,8 @@ """ import time, re, logging -from cookielib import (reraise_unmasked_exceptions, FileCookieJar, Cookie, - MISSING_FILENAME_TEXT, join_header_words, split_header_words, +from cookielib import (reraise_unmasked_exceptions, FileCookieJar, LoadError, + Cookie, MISSING_FILENAME_TEXT, join_header_words, split_header_words, iso2time, time2isoz) def lwp_cookie_str(cookie): @@ -93,7 +93,7 @@ magic = f.readline() if not re.search(self.magic_re, magic): msg = "%s does not seem to contain cookies" % filename - raise IOError(msg) + raise LoadError(msg) now = time.time() @@ -161,4 +161,4 @@ self.set_cookie(c) except: reraise_unmasked_exceptions((IOError,)) - raise IOError("invalid Set-Cookie3 format file %s" % filename) + raise LoadError("invalid Set-Cookie3 format file %s" % filename) Modified: python/branches/ssize_t/Lib/_MozillaCookieJar.py ============================================================================== --- python/branches/ssize_t/Lib/_MozillaCookieJar.py (original) +++ python/branches/ssize_t/Lib/_MozillaCookieJar.py Mon Jan 2 16:17:17 2006 @@ -2,8 +2,8 @@ import re, time, logging -from cookielib import (reraise_unmasked_exceptions, FileCookieJar, Cookie, - MISSING_FILENAME_TEXT) +from cookielib import (reraise_unmasked_exceptions, FileCookieJar, LoadError, + Cookie, MISSING_FILENAME_TEXT) class MozillaCookieJar(FileCookieJar): """ @@ -50,7 +50,7 @@ magic = f.readline() if not re.search(self.magic_re, magic): f.close() - raise IOError( + raise LoadError( "%s does not look like a Netscape format cookies file" % filename) @@ -106,8 +106,8 @@ except: reraise_unmasked_exceptions((IOError,)) - raise IOError("invalid Netscape format file %s: %s" % - (filename, line)) + raise LoadError("invalid Netscape format file %s: %s" % + (filename, line)) def save(self, filename=None, ignore_discard=False, ignore_expires=False): if filename is None: Modified: python/branches/ssize_t/Lib/codecs.py ============================================================================== --- python/branches/ssize_t/Lib/codecs.py (original) +++ python/branches/ssize_t/Lib/codecs.py Mon Jan 2 16:17:17 2006 @@ -269,7 +269,7 @@ if self.linebuffer: self.charbuffer = "".join(self.linebuffer) self.linebuffer = None - + # read until we get the required number of characters (if available) while True: # can the request can be satisfied from the character buffer? @@ -335,7 +335,7 @@ if not keepends: line = line.splitlines(False)[0] return line - + readsize = size or 72 line = "" # If size is given, we call read() only once Modified: python/branches/ssize_t/Lib/cookielib.py ============================================================================== --- python/branches/ssize_t/Lib/cookielib.py (original) +++ python/branches/ssize_t/Lib/cookielib.py Mon Jan 2 16:17:17 2006 @@ -460,10 +460,7 @@ if lc in known_attrs: k = lc if k == "version": - # This is an RFC 2109 cookie. Will be treated as RFC 2965 - # cookie in rest of code. - # Probably it should be parsed with split_header_words, but - # that's too much hassle. + # This is an RFC 2109 cookie. version_set = True if k == "expires": # convert expires date to seconds since epoch @@ -723,7 +720,9 @@ discard, comment, comment_url, - rest): + rest, + rfc2109=False, + ): if version is not None: version = int(version) if expires is not None: expires = int(expires) @@ -750,6 +749,7 @@ self.discard = discard self.comment = comment self.comment_url = comment_url + self.rfc2109 = rfc2109 self._rest = copy.copy(rest) @@ -787,6 +787,7 @@ attr = getattr(self, name) args.append("%s=%s" % (name, repr(attr))) args.append("rest=%s" % repr(self._rest)) + args.append("rfc2109=%s" % repr(self.rfc2109)) return "Cookie(%s)" % ", ".join(args) @@ -836,6 +837,7 @@ def __init__(self, blocked_domains=None, allowed_domains=None, netscape=True, rfc2965=False, + rfc2109_as_netscape=None, hide_cookie2=False, strict_domain=False, strict_rfc2965_unverifiable=True, @@ -847,6 +849,7 @@ """Constructor arguments should be passed as keyword arguments only.""" self.netscape = netscape self.rfc2965 = rfc2965 + self.rfc2109_as_netscape = rfc2109_as_netscape self.hide_cookie2 = hide_cookie2 self.strict_domain = strict_domain self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable @@ -1518,6 +1521,18 @@ if cookie: cookies.append(cookie) return cookies + def _process_rfc2109_cookies(self, cookies): + rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None) + if rfc2109_as_ns is None: + rfc2109_as_ns = not self._policy.rfc2965 + for cookie in cookies: + if cookie.version == 1: + cookie.rfc2109 = True + if rfc2109_as_ns: + # treat 2109 cookies as Netscape cookies rather than + # as RFC2965 cookies + cookie.version = 0 + def make_cookies(self, response, request): """Return sequence of Cookie objects extracted from response object.""" # get cookie-attributes for RFC 2965 and Netscape protocols @@ -1543,11 +1558,13 @@ if ns_hdrs and netscape: try: + # RFC 2109 and Netscape cookies ns_cookies = self._cookies_from_attrs_set( parse_ns_headers(ns_hdrs), request) except: reraise_unmasked_exceptions() ns_cookies = [] + self._process_rfc2109_cookies(ns_cookies) # Look for Netscape cookies (from Set-Cookie headers) that match # corresponding RFC 2965 cookies (from Set-Cookie2 headers). @@ -1682,7 +1699,8 @@ return "<%s[%s]>" % (self.__class__, ", ".join(r)) -class LoadError(Exception): pass +# derives from IOError for backwards-compatibility with Python 2.4.0 +class LoadError(IOError): pass class FileCookieJar(CookieJar): """CookieJar that can be loaded from and saved to a file.""" Modified: python/branches/ssize_t/Lib/csv.py ============================================================================== --- python/branches/ssize_t/Lib/csv.py (original) +++ python/branches/ssize_t/Lib/csv.py Mon Jan 2 16:17:17 2006 @@ -152,10 +152,13 @@ quotechar, delimiter, skipinitialspace = \ self._guess_quote_and_delimiter(sample, delimiters) - if delimiter is None: + if not delimiter: delimiter, skipinitialspace = self._guess_delimiter(sample, delimiters) + if not delimiter: + raise Error, "Could not determine delimiter" + class dialect(Dialect): _name = "sniffed" lineterminator = '\r\n' @@ -271,7 +274,7 @@ for char in ascii: metaFrequency = charFrequency.get(char, {}) # must count even if frequency is 0 - freq = line.strip().count(char) + freq = line.count(char) # value is the mode metaFrequency[freq] = metaFrequency.get(freq, 0) + 1 charFrequency[char] = metaFrequency @@ -329,8 +332,12 @@ data[0].count("%c " % d)) return (d, skipinitialspace) - # finally, just return the first damn character in the list - delim = delims.keys()[0] + # nothing else indicates a preference, pick the character that + # dominates(?) + items = [(v,k) for (k,v) in delims.items()] + items.sort() + delim = items[-1][1] + skipinitialspace = (data[0].count(delim) == data[0].count("%c " % delim)) return (delim, skipinitialspace) Modified: python/branches/ssize_t/Lib/encodings/cp037.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp037.py (original) +++ python/branches/ssize_t/Lib/encodings/cp037.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> CONTROL - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> CONTROL - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> CONTROL - u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0A -> CONTROL - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> CONTROL - u'\x85' # 0x15 -> CONTROL - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> CONTROL - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> CONTROL - u'\x8f' # 0x1B -> CONTROL - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> CONTROL - u'\x81' # 0x21 -> CONTROL - u'\x82' # 0x22 -> CONTROL - u'\x83' # 0x23 -> CONTROL - u'\x84' # 0x24 -> CONTROL - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> CONTROL - u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2A -> CONTROL - u'\x8b' # 0x2B -> CONTROL - u'\x8c' # 0x2C -> CONTROL - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> CONTROL - u'\x91' # 0x31 -> CONTROL - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> CONTROL - u'\x94' # 0x34 -> CONTROL - u'\x95' # 0x35 -> CONTROL - u'\x96' # 0x36 -> CONTROL - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> CONTROL - u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3A -> CONTROL - u'\x9b' # 0x3B -> CONTROL - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> CONTROL - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\xa0' # 0x41 -> NO-BREAK SPACE - u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE - u'\xa2' # 0x4A -> CENT SIGN - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'|' # 0x4F -> VERTICAL LINE - u'&' # 0x50 -> AMPERSAND - u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE - u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE - u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'!' # 0x5A -> EXCLAMATION MARK - u'$' # 0x5B -> DOLLAR SIGN - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'\xac' # 0x5F -> NOT SIGN - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xa6' # 0x6A -> BROKEN BAR - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE - u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE - u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7A -> COLON - u'#' # 0x7B -> NUMBER SIGN - u'@' # 0x7C -> COMMERCIAL AT - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'"' # 0x7F -> QUOTATION MARK - u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) - u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) - u'\xb1' # 0x8F -> PLUS-MINUS SIGN - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR - u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE - u'\xb8' # 0x9D -> CEDILLA - u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE - u'\xa4' # 0x9F -> CURRENCY SIGN - u'\xb5' # 0xA0 -> MICRO SIGN - u'~' # 0xA1 -> TILDE - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK - u'\xbf' # 0xAB -> INVERTED QUESTION MARK - u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) - u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) - u'\xae' # 0xAF -> REGISTERED SIGN - u'^' # 0xB0 -> CIRCUMFLEX ACCENT - u'\xa3' # 0xB1 -> POUND SIGN - u'\xa5' # 0xB2 -> YEN SIGN - u'\xb7' # 0xB3 -> MIDDLE DOT - u'\xa9' # 0xB4 -> COPYRIGHT SIGN - u'\xa7' # 0xB5 -> SECTION SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS - u'[' # 0xBA -> LEFT SQUARE BRACKET - u']' # 0xBB -> RIGHT SQUARE BRACKET - u'\xaf' # 0xBC -> MACRON - u'\xa8' # 0xBD -> DIAERESIS - u'\xb4' # 0xBE -> ACUTE ACCENT - u'\xd7' # 0xBF -> MULTIPLICATION SIGN - u'{' # 0xC0 -> LEFT CURLY BRACKET - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE - u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE - u'}' # 0xD0 -> RIGHT CURLY BRACKET - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xDA -> SUPERSCRIPT ONE - u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE - u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\\' # 0xE0 -> REVERSE SOLIDUS - u'\xf7' # 0xE1 -> DIVISION SIGN - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE - u'\x9f' # 0xFF -> CONTROL + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x9c' # 0x04 -> CONTROL + u'\t' # 0x05 -> HORIZONTAL TABULATION + u'\x86' # 0x06 -> CONTROL + u'\x7f' # 0x07 -> DELETE + u'\x97' # 0x08 -> CONTROL + u'\x8d' # 0x09 -> CONTROL + u'\x8e' # 0x0A -> CONTROL + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x9d' # 0x14 -> CONTROL + u'\x85' # 0x15 -> CONTROL + u'\x08' # 0x16 -> BACKSPACE + u'\x87' # 0x17 -> CONTROL + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x92' # 0x1A -> CONTROL + u'\x8f' # 0x1B -> CONTROL + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u'\x80' # 0x20 -> CONTROL + u'\x81' # 0x21 -> CONTROL + u'\x82' # 0x22 -> CONTROL + u'\x83' # 0x23 -> CONTROL + u'\x84' # 0x24 -> CONTROL + u'\n' # 0x25 -> LINE FEED + u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK + u'\x1b' # 0x27 -> ESCAPE + u'\x88' # 0x28 -> CONTROL + u'\x89' # 0x29 -> CONTROL + u'\x8a' # 0x2A -> CONTROL + u'\x8b' # 0x2B -> CONTROL + u'\x8c' # 0x2C -> CONTROL + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL + u'\x90' # 0x30 -> CONTROL + u'\x91' # 0x31 -> CONTROL + u'\x16' # 0x32 -> SYNCHRONOUS IDLE + u'\x93' # 0x33 -> CONTROL + u'\x94' # 0x34 -> CONTROL + u'\x95' # 0x35 -> CONTROL + u'\x96' # 0x36 -> CONTROL + u'\x04' # 0x37 -> END OF TRANSMISSION + u'\x98' # 0x38 -> CONTROL + u'\x99' # 0x39 -> CONTROL + u'\x9a' # 0x3A -> CONTROL + u'\x9b' # 0x3B -> CONTROL + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> CONTROL + u'\x1a' # 0x3F -> SUBSTITUTE + u' ' # 0x40 -> SPACE + u'\xa0' # 0x41 -> NO-BREAK SPACE + u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE + u'\xa2' # 0x4A -> CENT SIGN + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'|' # 0x4F -> VERTICAL LINE + u'&' # 0x50 -> AMPERSAND + u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE + u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE + u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) + u'!' # 0x5A -> EXCLAMATION MARK + u'$' # 0x5B -> DOLLAR SIGN + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'\xac' # 0x5F -> NOT SIGN + u'-' # 0x60 -> HYPHEN-MINUS + u'/' # 0x61 -> SOLIDUS + u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xa6' # 0x6A -> BROKEN BAR + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK + u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE + u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE + u'`' # 0x79 -> GRAVE ACCENT + u':' # 0x7A -> COLON + u'#' # 0x7B -> NUMBER SIGN + u'@' # 0x7C -> COMMERCIAL AT + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'"' # 0x7F -> QUOTATION MARK + u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE + u'a' # 0x81 -> LATIN SMALL LETTER A + u'b' # 0x82 -> LATIN SMALL LETTER B + u'c' # 0x83 -> LATIN SMALL LETTER C + u'd' # 0x84 -> LATIN SMALL LETTER D + u'e' # 0x85 -> LATIN SMALL LETTER E + u'f' # 0x86 -> LATIN SMALL LETTER F + u'g' # 0x87 -> LATIN SMALL LETTER G + u'h' # 0x88 -> LATIN SMALL LETTER H + u'i' # 0x89 -> LATIN SMALL LETTER I + u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) + u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) + u'\xb1' # 0x8F -> PLUS-MINUS SIGN + u'\xb0' # 0x90 -> DEGREE SIGN + u'j' # 0x91 -> LATIN SMALL LETTER J + u'k' # 0x92 -> LATIN SMALL LETTER K + u'l' # 0x93 -> LATIN SMALL LETTER L + u'm' # 0x94 -> LATIN SMALL LETTER M + u'n' # 0x95 -> LATIN SMALL LETTER N + u'o' # 0x96 -> LATIN SMALL LETTER O + u'p' # 0x97 -> LATIN SMALL LETTER P + u'q' # 0x98 -> LATIN SMALL LETTER Q + u'r' # 0x99 -> LATIN SMALL LETTER R + u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR + u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE + u'\xb8' # 0x9D -> CEDILLA + u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE + u'\xa4' # 0x9F -> CURRENCY SIGN + u'\xb5' # 0xA0 -> MICRO SIGN + u'~' # 0xA1 -> TILDE + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK + u'\xbf' # 0xAB -> INVERTED QUESTION MARK + u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) + u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) + u'\xae' # 0xAF -> REGISTERED SIGN + u'^' # 0xB0 -> CIRCUMFLEX ACCENT + u'\xa3' # 0xB1 -> POUND SIGN + u'\xa5' # 0xB2 -> YEN SIGN + u'\xb7' # 0xB3 -> MIDDLE DOT + u'\xa9' # 0xB4 -> COPYRIGHT SIGN + u'\xa7' # 0xB5 -> SECTION SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS + u'[' # 0xBA -> LEFT SQUARE BRACKET + u']' # 0xBB -> RIGHT SQUARE BRACKET + u'\xaf' # 0xBC -> MACRON + u'\xa8' # 0xBD -> DIAERESIS + u'\xb4' # 0xBE -> ACUTE ACCENT + u'\xd7' # 0xBF -> MULTIPLICATION SIGN + u'{' # 0xC0 -> LEFT CURLY BRACKET + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE + u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE + u'}' # 0xD0 -> RIGHT CURLY BRACKET + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0xDA -> SUPERSCRIPT ONE + u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE + u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\\' # 0xE0 -> REVERSE SOLIDUS + u'\xf7' # 0xE1 -> DIVISION SIGN + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE + u'\x9f' # 0xFF -> CONTROL ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2D, # ENQUIRY - 0x0006: 0x2E, # ACKNOWLEDGE - 0x0007: 0x2F, # BELL - 0x0008: 0x16, # BACKSPACE - 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000A: 0x25, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3C, # DEVICE CONTROL FOUR - 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x32, # SYNCHRONOUS IDLE - 0x0017: 0x26, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x3F, # SUBSTITUTE - 0x001B: 0x27, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x40, # SPACE - 0x0021: 0x5A, # EXCLAMATION MARK - 0x0022: 0x7F, # QUOTATION MARK - 0x0023: 0x7B, # NUMBER SIGN - 0x0024: 0x5B, # DOLLAR SIGN - 0x0025: 0x6C, # PERCENT SIGN - 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7D, # APOSTROPHE - 0x0028: 0x4D, # LEFT PARENTHESIS - 0x0029: 0x5D, # RIGHT PARENTHESIS - 0x002A: 0x5C, # ASTERISK - 0x002B: 0x4E, # PLUS SIGN - 0x002C: 0x6B, # COMMA - 0x002D: 0x60, # HYPHEN-MINUS - 0x002E: 0x4B, # FULL STOP - 0x002F: 0x61, # SOLIDUS - 0x0030: 0xF0, # DIGIT ZERO - 0x0031: 0xF1, # DIGIT ONE - 0x0032: 0xF2, # DIGIT TWO - 0x0033: 0xF3, # DIGIT THREE - 0x0034: 0xF4, # DIGIT FOUR - 0x0035: 0xF5, # DIGIT FIVE - 0x0036: 0xF6, # DIGIT SIX - 0x0037: 0xF7, # DIGIT SEVEN - 0x0038: 0xF8, # DIGIT EIGHT - 0x0039: 0xF9, # DIGIT NINE - 0x003A: 0x7A, # COLON - 0x003B: 0x5E, # SEMICOLON - 0x003C: 0x4C, # LESS-THAN SIGN - 0x003D: 0x7E, # EQUALS SIGN - 0x003E: 0x6E, # GREATER-THAN SIGN - 0x003F: 0x6F, # QUESTION MARK - 0x0040: 0x7C, # COMMERCIAL AT - 0x0041: 0xC1, # LATIN CAPITAL LETTER A - 0x0042: 0xC2, # LATIN CAPITAL LETTER B - 0x0043: 0xC3, # LATIN CAPITAL LETTER C - 0x0044: 0xC4, # LATIN CAPITAL LETTER D - 0x0045: 0xC5, # LATIN CAPITAL LETTER E - 0x0046: 0xC6, # LATIN CAPITAL LETTER F - 0x0047: 0xC7, # LATIN CAPITAL LETTER G - 0x0048: 0xC8, # LATIN CAPITAL LETTER H - 0x0049: 0xC9, # LATIN CAPITAL LETTER I - 0x004A: 0xD1, # LATIN CAPITAL LETTER J - 0x004B: 0xD2, # LATIN CAPITAL LETTER K - 0x004C: 0xD3, # LATIN CAPITAL LETTER L - 0x004D: 0xD4, # LATIN CAPITAL LETTER M - 0x004E: 0xD5, # LATIN CAPITAL LETTER N - 0x004F: 0xD6, # LATIN CAPITAL LETTER O - 0x0050: 0xD7, # LATIN CAPITAL LETTER P - 0x0051: 0xD8, # LATIN CAPITAL LETTER Q - 0x0052: 0xD9, # LATIN CAPITAL LETTER R - 0x0053: 0xE2, # LATIN CAPITAL LETTER S - 0x0054: 0xE3, # LATIN CAPITAL LETTER T - 0x0055: 0xE4, # LATIN CAPITAL LETTER U - 0x0056: 0xE5, # LATIN CAPITAL LETTER V - 0x0057: 0xE6, # LATIN CAPITAL LETTER W - 0x0058: 0xE7, # LATIN CAPITAL LETTER X - 0x0059: 0xE8, # LATIN CAPITAL LETTER Y - 0x005A: 0xE9, # LATIN CAPITAL LETTER Z - 0x005B: 0xBA, # LEFT SQUARE BRACKET - 0x005C: 0xE0, # REVERSE SOLIDUS - 0x005D: 0xBB, # RIGHT SQUARE BRACKET - 0x005E: 0xB0, # CIRCUMFLEX ACCENT - 0x005F: 0x6D, # LOW LINE - 0x0060: 0x79, # GRAVE ACCENT - 0x0061: 0x81, # LATIN SMALL LETTER A - 0x0062: 0x82, # LATIN SMALL LETTER B - 0x0063: 0x83, # LATIN SMALL LETTER C - 0x0064: 0x84, # LATIN SMALL LETTER D - 0x0065: 0x85, # LATIN SMALL LETTER E - 0x0066: 0x86, # LATIN SMALL LETTER F - 0x0067: 0x87, # LATIN SMALL LETTER G - 0x0068: 0x88, # LATIN SMALL LETTER H - 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006A: 0x91, # LATIN SMALL LETTER J - 0x006B: 0x92, # LATIN SMALL LETTER K - 0x006C: 0x93, # LATIN SMALL LETTER L - 0x006D: 0x94, # LATIN SMALL LETTER M - 0x006E: 0x95, # LATIN SMALL LETTER N - 0x006F: 0x96, # LATIN SMALL LETTER O - 0x0070: 0x97, # LATIN SMALL LETTER P - 0x0071: 0x98, # LATIN SMALL LETTER Q - 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xA2, # LATIN SMALL LETTER S - 0x0074: 0xA3, # LATIN SMALL LETTER T - 0x0075: 0xA4, # LATIN SMALL LETTER U - 0x0076: 0xA5, # LATIN SMALL LETTER V - 0x0077: 0xA6, # LATIN SMALL LETTER W - 0x0078: 0xA7, # LATIN SMALL LETTER X - 0x0079: 0xA8, # LATIN SMALL LETTER Y - 0x007A: 0xA9, # LATIN SMALL LETTER Z - 0x007B: 0xC0, # LEFT CURLY BRACKET - 0x007C: 0x4F, # VERTICAL LINE - 0x007D: 0xD0, # RIGHT CURLY BRACKET - 0x007E: 0xA1, # TILDE - 0x007F: 0x07, # DELETE - 0x0080: 0x20, # CONTROL - 0x0081: 0x21, # CONTROL - 0x0082: 0x22, # CONTROL - 0x0083: 0x23, # CONTROL - 0x0084: 0x24, # CONTROL - 0x0085: 0x15, # CONTROL - 0x0086: 0x06, # CONTROL - 0x0087: 0x17, # CONTROL - 0x0088: 0x28, # CONTROL - 0x0089: 0x29, # CONTROL - 0x008A: 0x2A, # CONTROL - 0x008B: 0x2B, # CONTROL - 0x008C: 0x2C, # CONTROL - 0x008D: 0x09, # CONTROL - 0x008E: 0x0A, # CONTROL - 0x008F: 0x1B, # CONTROL - 0x0090: 0x30, # CONTROL - 0x0091: 0x31, # CONTROL - 0x0092: 0x1A, # CONTROL - 0x0093: 0x33, # CONTROL - 0x0094: 0x34, # CONTROL - 0x0095: 0x35, # CONTROL - 0x0096: 0x36, # CONTROL - 0x0097: 0x08, # CONTROL - 0x0098: 0x38, # CONTROL - 0x0099: 0x39, # CONTROL - 0x009A: 0x3A, # CONTROL - 0x009B: 0x3B, # CONTROL - 0x009C: 0x04, # CONTROL - 0x009D: 0x14, # CONTROL - 0x009E: 0x3E, # CONTROL - 0x009F: 0xFF, # CONTROL - 0x00A0: 0x41, # NO-BREAK SPACE - 0x00A1: 0xAA, # INVERTED EXCLAMATION MARK - 0x00A2: 0x4A, # CENT SIGN - 0x00A3: 0xB1, # POUND SIGN - 0x00A4: 0x9F, # CURRENCY SIGN - 0x00A5: 0xB2, # YEN SIGN - 0x00A6: 0x6A, # BROKEN BAR - 0x00A7: 0xB5, # SECTION SIGN - 0x00A8: 0xBD, # DIAERESIS - 0x00A9: 0xB4, # COPYRIGHT SIGN - 0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR - 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0x5F, # NOT SIGN - 0x00AD: 0xCA, # SOFT HYPHEN - 0x00AE: 0xAF, # REGISTERED SIGN - 0x00AF: 0xBC, # MACRON - 0x00B0: 0x90, # DEGREE SIGN - 0x00B1: 0x8F, # PLUS-MINUS SIGN - 0x00B2: 0xEA, # SUPERSCRIPT TWO - 0x00B3: 0xFA, # SUPERSCRIPT THREE - 0x00B4: 0xBE, # ACUTE ACCENT - 0x00B5: 0xA0, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB3, # MIDDLE DOT - 0x00B8: 0x9D, # CEDILLA - 0x00B9: 0xDA, # SUPERSCRIPT ONE - 0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF - 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xAB, # INVERTED QUESTION MARK - 0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE - 0x00C7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D0: 0xAC, # LATIN CAPITAL LETTER ETH (ICELANDIC) - 0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xEC, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xBF, # MULTIPLICATION SIGN - 0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xFC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xAD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DE: 0xAE, # LATIN CAPITAL LETTER THORN (ICELANDIC) - 0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0x9C, # LATIN SMALL LIGATURE AE - 0x00E7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F0: 0x8C, # LATIN SMALL LETTER ETH (ICELANDIC) - 0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xCC, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xE1, # DIVISION SIGN - 0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xDC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0x8D, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FE: 0x8E, # LATIN SMALL LETTER THORN (ICELANDIC) - 0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x37, # END OF TRANSMISSION + 0x0005: 0x2D, # ENQUIRY + 0x0006: 0x2E, # ACKNOWLEDGE + 0x0007: 0x2F, # BELL + 0x0008: 0x16, # BACKSPACE + 0x0009: 0x05, # HORIZONTAL TABULATION + 0x000A: 0x25, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x3C, # DEVICE CONTROL FOUR + 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x32, # SYNCHRONOUS IDLE + 0x0017: 0x26, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x3F, # SUBSTITUTE + 0x001B: 0x27, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x40, # SPACE + 0x0021: 0x5A, # EXCLAMATION MARK + 0x0022: 0x7F, # QUOTATION MARK + 0x0023: 0x7B, # NUMBER SIGN + 0x0024: 0x5B, # DOLLAR SIGN + 0x0025: 0x6C, # PERCENT SIGN + 0x0026: 0x50, # AMPERSAND + 0x0027: 0x7D, # APOSTROPHE + 0x0028: 0x4D, # LEFT PARENTHESIS + 0x0029: 0x5D, # RIGHT PARENTHESIS + 0x002A: 0x5C, # ASTERISK + 0x002B: 0x4E, # PLUS SIGN + 0x002C: 0x6B, # COMMA + 0x002D: 0x60, # HYPHEN-MINUS + 0x002E: 0x4B, # FULL STOP + 0x002F: 0x61, # SOLIDUS + 0x0030: 0xF0, # DIGIT ZERO + 0x0031: 0xF1, # DIGIT ONE + 0x0032: 0xF2, # DIGIT TWO + 0x0033: 0xF3, # DIGIT THREE + 0x0034: 0xF4, # DIGIT FOUR + 0x0035: 0xF5, # DIGIT FIVE + 0x0036: 0xF6, # DIGIT SIX + 0x0037: 0xF7, # DIGIT SEVEN + 0x0038: 0xF8, # DIGIT EIGHT + 0x0039: 0xF9, # DIGIT NINE + 0x003A: 0x7A, # COLON + 0x003B: 0x5E, # SEMICOLON + 0x003C: 0x4C, # LESS-THAN SIGN + 0x003D: 0x7E, # EQUALS SIGN + 0x003E: 0x6E, # GREATER-THAN SIGN + 0x003F: 0x6F, # QUESTION MARK + 0x0040: 0x7C, # COMMERCIAL AT + 0x0041: 0xC1, # LATIN CAPITAL LETTER A + 0x0042: 0xC2, # LATIN CAPITAL LETTER B + 0x0043: 0xC3, # LATIN CAPITAL LETTER C + 0x0044: 0xC4, # LATIN CAPITAL LETTER D + 0x0045: 0xC5, # LATIN CAPITAL LETTER E + 0x0046: 0xC6, # LATIN CAPITAL LETTER F + 0x0047: 0xC7, # LATIN CAPITAL LETTER G + 0x0048: 0xC8, # LATIN CAPITAL LETTER H + 0x0049: 0xC9, # LATIN CAPITAL LETTER I + 0x004A: 0xD1, # LATIN CAPITAL LETTER J + 0x004B: 0xD2, # LATIN CAPITAL LETTER K + 0x004C: 0xD3, # LATIN CAPITAL LETTER L + 0x004D: 0xD4, # LATIN CAPITAL LETTER M + 0x004E: 0xD5, # LATIN CAPITAL LETTER N + 0x004F: 0xD6, # LATIN CAPITAL LETTER O + 0x0050: 0xD7, # LATIN CAPITAL LETTER P + 0x0051: 0xD8, # LATIN CAPITAL LETTER Q + 0x0052: 0xD9, # LATIN CAPITAL LETTER R + 0x0053: 0xE2, # LATIN CAPITAL LETTER S + 0x0054: 0xE3, # LATIN CAPITAL LETTER T + 0x0055: 0xE4, # LATIN CAPITAL LETTER U + 0x0056: 0xE5, # LATIN CAPITAL LETTER V + 0x0057: 0xE6, # LATIN CAPITAL LETTER W + 0x0058: 0xE7, # LATIN CAPITAL LETTER X + 0x0059: 0xE8, # LATIN CAPITAL LETTER Y + 0x005A: 0xE9, # LATIN CAPITAL LETTER Z + 0x005B: 0xBA, # LEFT SQUARE BRACKET + 0x005C: 0xE0, # REVERSE SOLIDUS + 0x005D: 0xBB, # RIGHT SQUARE BRACKET + 0x005E: 0xB0, # CIRCUMFLEX ACCENT + 0x005F: 0x6D, # LOW LINE + 0x0060: 0x79, # GRAVE ACCENT + 0x0061: 0x81, # LATIN SMALL LETTER A + 0x0062: 0x82, # LATIN SMALL LETTER B + 0x0063: 0x83, # LATIN SMALL LETTER C + 0x0064: 0x84, # LATIN SMALL LETTER D + 0x0065: 0x85, # LATIN SMALL LETTER E + 0x0066: 0x86, # LATIN SMALL LETTER F + 0x0067: 0x87, # LATIN SMALL LETTER G + 0x0068: 0x88, # LATIN SMALL LETTER H + 0x0069: 0x89, # LATIN SMALL LETTER I + 0x006A: 0x91, # LATIN SMALL LETTER J + 0x006B: 0x92, # LATIN SMALL LETTER K + 0x006C: 0x93, # LATIN SMALL LETTER L + 0x006D: 0x94, # LATIN SMALL LETTER M + 0x006E: 0x95, # LATIN SMALL LETTER N + 0x006F: 0x96, # LATIN SMALL LETTER O + 0x0070: 0x97, # LATIN SMALL LETTER P + 0x0071: 0x98, # LATIN SMALL LETTER Q + 0x0072: 0x99, # LATIN SMALL LETTER R + 0x0073: 0xA2, # LATIN SMALL LETTER S + 0x0074: 0xA3, # LATIN SMALL LETTER T + 0x0075: 0xA4, # LATIN SMALL LETTER U + 0x0076: 0xA5, # LATIN SMALL LETTER V + 0x0077: 0xA6, # LATIN SMALL LETTER W + 0x0078: 0xA7, # LATIN SMALL LETTER X + 0x0079: 0xA8, # LATIN SMALL LETTER Y + 0x007A: 0xA9, # LATIN SMALL LETTER Z + 0x007B: 0xC0, # LEFT CURLY BRACKET + 0x007C: 0x4F, # VERTICAL LINE + 0x007D: 0xD0, # RIGHT CURLY BRACKET + 0x007E: 0xA1, # TILDE + 0x007F: 0x07, # DELETE + 0x0080: 0x20, # CONTROL + 0x0081: 0x21, # CONTROL + 0x0082: 0x22, # CONTROL + 0x0083: 0x23, # CONTROL + 0x0084: 0x24, # CONTROL + 0x0085: 0x15, # CONTROL + 0x0086: 0x06, # CONTROL + 0x0087: 0x17, # CONTROL + 0x0088: 0x28, # CONTROL + 0x0089: 0x29, # CONTROL + 0x008A: 0x2A, # CONTROL + 0x008B: 0x2B, # CONTROL + 0x008C: 0x2C, # CONTROL + 0x008D: 0x09, # CONTROL + 0x008E: 0x0A, # CONTROL + 0x008F: 0x1B, # CONTROL + 0x0090: 0x30, # CONTROL + 0x0091: 0x31, # CONTROL + 0x0092: 0x1A, # CONTROL + 0x0093: 0x33, # CONTROL + 0x0094: 0x34, # CONTROL + 0x0095: 0x35, # CONTROL + 0x0096: 0x36, # CONTROL + 0x0097: 0x08, # CONTROL + 0x0098: 0x38, # CONTROL + 0x0099: 0x39, # CONTROL + 0x009A: 0x3A, # CONTROL + 0x009B: 0x3B, # CONTROL + 0x009C: 0x04, # CONTROL + 0x009D: 0x14, # CONTROL + 0x009E: 0x3E, # CONTROL + 0x009F: 0xFF, # CONTROL + 0x00A0: 0x41, # NO-BREAK SPACE + 0x00A1: 0xAA, # INVERTED EXCLAMATION MARK + 0x00A2: 0x4A, # CENT SIGN + 0x00A3: 0xB1, # POUND SIGN + 0x00A4: 0x9F, # CURRENCY SIGN + 0x00A5: 0xB2, # YEN SIGN + 0x00A6: 0x6A, # BROKEN BAR + 0x00A7: 0xB5, # SECTION SIGN + 0x00A8: 0xBD, # DIAERESIS + 0x00A9: 0xB4, # COPYRIGHT SIGN + 0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR + 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0x5F, # NOT SIGN + 0x00AD: 0xCA, # SOFT HYPHEN + 0x00AE: 0xAF, # REGISTERED SIGN + 0x00AF: 0xBC, # MACRON + 0x00B0: 0x90, # DEGREE SIGN + 0x00B1: 0x8F, # PLUS-MINUS SIGN + 0x00B2: 0xEA, # SUPERSCRIPT TWO + 0x00B3: 0xFA, # SUPERSCRIPT THREE + 0x00B4: 0xBE, # ACUTE ACCENT + 0x00B5: 0xA0, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB3, # MIDDLE DOT + 0x00B8: 0x9D, # CEDILLA + 0x00B9: 0xDA, # SUPERSCRIPT ONE + 0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF + 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xAB, # INVERTED QUESTION MARK + 0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE + 0x00C7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D0: 0xAC, # LATIN CAPITAL LETTER ETH (ICELANDIC) + 0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xEC, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xBF, # MULTIPLICATION SIGN + 0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xFC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xAD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DE: 0xAE, # LATIN CAPITAL LETTER THORN (ICELANDIC) + 0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0x9C, # LATIN SMALL LIGATURE AE + 0x00E7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F0: 0x8C, # LATIN SMALL LETTER ETH (ICELANDIC) + 0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xCC, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xE1, # DIVISION SIGN + 0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xDC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0x8D, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FE: 0x8E, # LATIN SMALL LETTER THORN (ICELANDIC) + 0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS } - Modified: python/branches/ssize_t/Lib/encodings/cp1006.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp1006.py (original) +++ python/branches/ssize_t/Lib/encodings/cp1006.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,521 +32,520 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u06f0' # 0xA1 -> EXTENDED ARABIC-INDIC DIGIT ZERO - u'\u06f1' # 0xA2 -> EXTENDED ARABIC-INDIC DIGIT ONE - u'\u06f2' # 0xA3 -> EXTENDED ARABIC-INDIC DIGIT TWO - u'\u06f3' # 0xA4 -> EXTENDED ARABIC-INDIC DIGIT THREE - u'\u06f4' # 0xA5 -> EXTENDED ARABIC-INDIC DIGIT FOUR - u'\u06f5' # 0xA6 -> EXTENDED ARABIC-INDIC DIGIT FIVE - u'\u06f6' # 0xA7 -> EXTENDED ARABIC-INDIC DIGIT SIX - u'\u06f7' # 0xA8 -> EXTENDED ARABIC-INDIC DIGIT SEVEN - u'\u06f8' # 0xA9 -> EXTENDED ARABIC-INDIC DIGIT EIGHT - u'\u06f9' # 0xAA -> EXTENDED ARABIC-INDIC DIGIT NINE - u'\u060c' # 0xAB -> ARABIC COMMA - u'\u061b' # 0xAC -> ARABIC SEMICOLON - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u061f' # 0xAE -> ARABIC QUESTION MARK - u'\ufe81' # 0xAF -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - u'\ufe8d' # 0xB0 -> ARABIC LETTER ALEF ISOLATED FORM - u'\ufe8e' # 0xB1 -> ARABIC LETTER ALEF FINAL FORM - u'\ufe8e' # 0xB2 -> ARABIC LETTER ALEF FINAL FORM - u'\ufe8f' # 0xB3 -> ARABIC LETTER BEH ISOLATED FORM - u'\ufe91' # 0xB4 -> ARABIC LETTER BEH INITIAL FORM - u'\ufb56' # 0xB5 -> ARABIC LETTER PEH ISOLATED FORM - u'\ufb58' # 0xB6 -> ARABIC LETTER PEH INITIAL FORM - u'\ufe93' # 0xB7 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM - u'\ufe95' # 0xB8 -> ARABIC LETTER TEH ISOLATED FORM - u'\ufe97' # 0xB9 -> ARABIC LETTER TEH INITIAL FORM - u'\ufb66' # 0xBA -> ARABIC LETTER TTEH ISOLATED FORM - u'\ufb68' # 0xBB -> ARABIC LETTER TTEH INITIAL FORM - u'\ufe99' # 0xBC -> ARABIC LETTER THEH ISOLATED FORM - u'\ufe9b' # 0xBD -> ARABIC LETTER THEH INITIAL FORM - u'\ufe9d' # 0xBE -> ARABIC LETTER JEEM ISOLATED FORM - u'\ufe9f' # 0xBF -> ARABIC LETTER JEEM INITIAL FORM - u'\ufb7a' # 0xC0 -> ARABIC LETTER TCHEH ISOLATED FORM - u'\ufb7c' # 0xC1 -> ARABIC LETTER TCHEH INITIAL FORM - u'\ufea1' # 0xC2 -> ARABIC LETTER HAH ISOLATED FORM - u'\ufea3' # 0xC3 -> ARABIC LETTER HAH INITIAL FORM - u'\ufea5' # 0xC4 -> ARABIC LETTER KHAH ISOLATED FORM - u'\ufea7' # 0xC5 -> ARABIC LETTER KHAH INITIAL FORM - u'\ufea9' # 0xC6 -> ARABIC LETTER DAL ISOLATED FORM - u'\ufb84' # 0xC7 -> ARABIC LETTER DAHAL ISOLATED FORMN - u'\ufeab' # 0xC8 -> ARABIC LETTER THAL ISOLATED FORM - u'\ufead' # 0xC9 -> ARABIC LETTER REH ISOLATED FORM - u'\ufb8c' # 0xCA -> ARABIC LETTER RREH ISOLATED FORM - u'\ufeaf' # 0xCB -> ARABIC LETTER ZAIN ISOLATED FORM - u'\ufb8a' # 0xCC -> ARABIC LETTER JEH ISOLATED FORM - u'\ufeb1' # 0xCD -> ARABIC LETTER SEEN ISOLATED FORM - u'\ufeb3' # 0xCE -> ARABIC LETTER SEEN INITIAL FORM - u'\ufeb5' # 0xCF -> ARABIC LETTER SHEEN ISOLATED FORM - u'\ufeb7' # 0xD0 -> ARABIC LETTER SHEEN INITIAL FORM - u'\ufeb9' # 0xD1 -> ARABIC LETTER SAD ISOLATED FORM - u'\ufebb' # 0xD2 -> ARABIC LETTER SAD INITIAL FORM - u'\ufebd' # 0xD3 -> ARABIC LETTER DAD ISOLATED FORM - u'\ufebf' # 0xD4 -> ARABIC LETTER DAD INITIAL FORM - u'\ufec1' # 0xD5 -> ARABIC LETTER TAH ISOLATED FORM - u'\ufec5' # 0xD6 -> ARABIC LETTER ZAH ISOLATED FORM - u'\ufec9' # 0xD7 -> ARABIC LETTER AIN ISOLATED FORM - u'\ufeca' # 0xD8 -> ARABIC LETTER AIN FINAL FORM - u'\ufecb' # 0xD9 -> ARABIC LETTER AIN INITIAL FORM - u'\ufecc' # 0xDA -> ARABIC LETTER AIN MEDIAL FORM - u'\ufecd' # 0xDB -> ARABIC LETTER GHAIN ISOLATED FORM - u'\ufece' # 0xDC -> ARABIC LETTER GHAIN FINAL FORM - u'\ufecf' # 0xDD -> ARABIC LETTER GHAIN INITIAL FORM - u'\ufed0' # 0xDE -> ARABIC LETTER GHAIN MEDIAL FORM - u'\ufed1' # 0xDF -> ARABIC LETTER FEH ISOLATED FORM - u'\ufed3' # 0xE0 -> ARABIC LETTER FEH INITIAL FORM - u'\ufed5' # 0xE1 -> ARABIC LETTER QAF ISOLATED FORM - u'\ufed7' # 0xE2 -> ARABIC LETTER QAF INITIAL FORM - u'\ufed9' # 0xE3 -> ARABIC LETTER KAF ISOLATED FORM - u'\ufedb' # 0xE4 -> ARABIC LETTER KAF INITIAL FORM - u'\ufb92' # 0xE5 -> ARABIC LETTER GAF ISOLATED FORM - u'\ufb94' # 0xE6 -> ARABIC LETTER GAF INITIAL FORM - u'\ufedd' # 0xE7 -> ARABIC LETTER LAM ISOLATED FORM - u'\ufedf' # 0xE8 -> ARABIC LETTER LAM INITIAL FORM - u'\ufee0' # 0xE9 -> ARABIC LETTER LAM MEDIAL FORM - u'\ufee1' # 0xEA -> ARABIC LETTER MEEM ISOLATED FORM - u'\ufee3' # 0xEB -> ARABIC LETTER MEEM INITIAL FORM - u'\ufb9e' # 0xEC -> ARABIC LETTER NOON GHUNNA ISOLATED FORM - u'\ufee5' # 0xED -> ARABIC LETTER NOON ISOLATED FORM - u'\ufee7' # 0xEE -> ARABIC LETTER NOON INITIAL FORM - u'\ufe85' # 0xEF -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - u'\ufeed' # 0xF0 -> ARABIC LETTER WAW ISOLATED FORM - u'\ufba6' # 0xF1 -> ARABIC LETTER HEH GOAL ISOLATED FORM - u'\ufba8' # 0xF2 -> ARABIC LETTER HEH GOAL INITIAL FORM - u'\ufba9' # 0xF3 -> ARABIC LETTER HEH GOAL MEDIAL FORM - u'\ufbaa' # 0xF4 -> ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM - u'\ufe80' # 0xF5 -> ARABIC LETTER HAMZA ISOLATED FORM - u'\ufe89' # 0xF6 -> ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM - u'\ufe8a' # 0xF7 -> ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM - u'\ufe8b' # 0xF8 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - u'\ufef1' # 0xF9 -> ARABIC LETTER YEH ISOLATED FORM - u'\ufef2' # 0xFA -> ARABIC LETTER YEH FINAL FORM - u'\ufef3' # 0xFB -> ARABIC LETTER YEH INITIAL FORM - u'\ufbb0' # 0xFC -> ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM - u'\ufbae' # 0xFD -> ARABIC LETTER YEH BARREE ISOLATED FORM - u'\ufe7c' # 0xFE -> ARABIC SHADDA ISOLATED FORM - u'\ufe7d' # 0xFF -> ARABIC SHADDA MEDIAL FORM + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u06f0' # 0xA1 -> EXTENDED ARABIC-INDIC DIGIT ZERO + u'\u06f1' # 0xA2 -> EXTENDED ARABIC-INDIC DIGIT ONE + u'\u06f2' # 0xA3 -> EXTENDED ARABIC-INDIC DIGIT TWO + u'\u06f3' # 0xA4 -> EXTENDED ARABIC-INDIC DIGIT THREE + u'\u06f4' # 0xA5 -> EXTENDED ARABIC-INDIC DIGIT FOUR + u'\u06f5' # 0xA6 -> EXTENDED ARABIC-INDIC DIGIT FIVE + u'\u06f6' # 0xA7 -> EXTENDED ARABIC-INDIC DIGIT SIX + u'\u06f7' # 0xA8 -> EXTENDED ARABIC-INDIC DIGIT SEVEN + u'\u06f8' # 0xA9 -> EXTENDED ARABIC-INDIC DIGIT EIGHT + u'\u06f9' # 0xAA -> EXTENDED ARABIC-INDIC DIGIT NINE + u'\u060c' # 0xAB -> ARABIC COMMA + u'\u061b' # 0xAC -> ARABIC SEMICOLON + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u061f' # 0xAE -> ARABIC QUESTION MARK + u'\ufe81' # 0xAF -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + u'\ufe8d' # 0xB0 -> ARABIC LETTER ALEF ISOLATED FORM + u'\ufe8e' # 0xB1 -> ARABIC LETTER ALEF FINAL FORM + u'\ufe8e' # 0xB2 -> ARABIC LETTER ALEF FINAL FORM + u'\ufe8f' # 0xB3 -> ARABIC LETTER BEH ISOLATED FORM + u'\ufe91' # 0xB4 -> ARABIC LETTER BEH INITIAL FORM + u'\ufb56' # 0xB5 -> ARABIC LETTER PEH ISOLATED FORM + u'\ufb58' # 0xB6 -> ARABIC LETTER PEH INITIAL FORM + u'\ufe93' # 0xB7 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM + u'\ufe95' # 0xB8 -> ARABIC LETTER TEH ISOLATED FORM + u'\ufe97' # 0xB9 -> ARABIC LETTER TEH INITIAL FORM + u'\ufb66' # 0xBA -> ARABIC LETTER TTEH ISOLATED FORM + u'\ufb68' # 0xBB -> ARABIC LETTER TTEH INITIAL FORM + u'\ufe99' # 0xBC -> ARABIC LETTER THEH ISOLATED FORM + u'\ufe9b' # 0xBD -> ARABIC LETTER THEH INITIAL FORM + u'\ufe9d' # 0xBE -> ARABIC LETTER JEEM ISOLATED FORM + u'\ufe9f' # 0xBF -> ARABIC LETTER JEEM INITIAL FORM + u'\ufb7a' # 0xC0 -> ARABIC LETTER TCHEH ISOLATED FORM + u'\ufb7c' # 0xC1 -> ARABIC LETTER TCHEH INITIAL FORM + u'\ufea1' # 0xC2 -> ARABIC LETTER HAH ISOLATED FORM + u'\ufea3' # 0xC3 -> ARABIC LETTER HAH INITIAL FORM + u'\ufea5' # 0xC4 -> ARABIC LETTER KHAH ISOLATED FORM + u'\ufea7' # 0xC5 -> ARABIC LETTER KHAH INITIAL FORM + u'\ufea9' # 0xC6 -> ARABIC LETTER DAL ISOLATED FORM + u'\ufb84' # 0xC7 -> ARABIC LETTER DAHAL ISOLATED FORMN + u'\ufeab' # 0xC8 -> ARABIC LETTER THAL ISOLATED FORM + u'\ufead' # 0xC9 -> ARABIC LETTER REH ISOLATED FORM + u'\ufb8c' # 0xCA -> ARABIC LETTER RREH ISOLATED FORM + u'\ufeaf' # 0xCB -> ARABIC LETTER ZAIN ISOLATED FORM + u'\ufb8a' # 0xCC -> ARABIC LETTER JEH ISOLATED FORM + u'\ufeb1' # 0xCD -> ARABIC LETTER SEEN ISOLATED FORM + u'\ufeb3' # 0xCE -> ARABIC LETTER SEEN INITIAL FORM + u'\ufeb5' # 0xCF -> ARABIC LETTER SHEEN ISOLATED FORM + u'\ufeb7' # 0xD0 -> ARABIC LETTER SHEEN INITIAL FORM + u'\ufeb9' # 0xD1 -> ARABIC LETTER SAD ISOLATED FORM + u'\ufebb' # 0xD2 -> ARABIC LETTER SAD INITIAL FORM + u'\ufebd' # 0xD3 -> ARABIC LETTER DAD ISOLATED FORM + u'\ufebf' # 0xD4 -> ARABIC LETTER DAD INITIAL FORM + u'\ufec1' # 0xD5 -> ARABIC LETTER TAH ISOLATED FORM + u'\ufec5' # 0xD6 -> ARABIC LETTER ZAH ISOLATED FORM + u'\ufec9' # 0xD7 -> ARABIC LETTER AIN ISOLATED FORM + u'\ufeca' # 0xD8 -> ARABIC LETTER AIN FINAL FORM + u'\ufecb' # 0xD9 -> ARABIC LETTER AIN INITIAL FORM + u'\ufecc' # 0xDA -> ARABIC LETTER AIN MEDIAL FORM + u'\ufecd' # 0xDB -> ARABIC LETTER GHAIN ISOLATED FORM + u'\ufece' # 0xDC -> ARABIC LETTER GHAIN FINAL FORM + u'\ufecf' # 0xDD -> ARABIC LETTER GHAIN INITIAL FORM + u'\ufed0' # 0xDE -> ARABIC LETTER GHAIN MEDIAL FORM + u'\ufed1' # 0xDF -> ARABIC LETTER FEH ISOLATED FORM + u'\ufed3' # 0xE0 -> ARABIC LETTER FEH INITIAL FORM + u'\ufed5' # 0xE1 -> ARABIC LETTER QAF ISOLATED FORM + u'\ufed7' # 0xE2 -> ARABIC LETTER QAF INITIAL FORM + u'\ufed9' # 0xE3 -> ARABIC LETTER KAF ISOLATED FORM + u'\ufedb' # 0xE4 -> ARABIC LETTER KAF INITIAL FORM + u'\ufb92' # 0xE5 -> ARABIC LETTER GAF ISOLATED FORM + u'\ufb94' # 0xE6 -> ARABIC LETTER GAF INITIAL FORM + u'\ufedd' # 0xE7 -> ARABIC LETTER LAM ISOLATED FORM + u'\ufedf' # 0xE8 -> ARABIC LETTER LAM INITIAL FORM + u'\ufee0' # 0xE9 -> ARABIC LETTER LAM MEDIAL FORM + u'\ufee1' # 0xEA -> ARABIC LETTER MEEM ISOLATED FORM + u'\ufee3' # 0xEB -> ARABIC LETTER MEEM INITIAL FORM + u'\ufb9e' # 0xEC -> ARABIC LETTER NOON GHUNNA ISOLATED FORM + u'\ufee5' # 0xED -> ARABIC LETTER NOON ISOLATED FORM + u'\ufee7' # 0xEE -> ARABIC LETTER NOON INITIAL FORM + u'\ufe85' # 0xEF -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + u'\ufeed' # 0xF0 -> ARABIC LETTER WAW ISOLATED FORM + u'\ufba6' # 0xF1 -> ARABIC LETTER HEH GOAL ISOLATED FORM + u'\ufba8' # 0xF2 -> ARABIC LETTER HEH GOAL INITIAL FORM + u'\ufba9' # 0xF3 -> ARABIC LETTER HEH GOAL MEDIAL FORM + u'\ufbaa' # 0xF4 -> ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM + u'\ufe80' # 0xF5 -> ARABIC LETTER HAMZA ISOLATED FORM + u'\ufe89' # 0xF6 -> ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM + u'\ufe8a' # 0xF7 -> ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM + u'\ufe8b' # 0xF8 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + u'\ufef1' # 0xF9 -> ARABIC LETTER YEH ISOLATED FORM + u'\ufef2' # 0xFA -> ARABIC LETTER YEH FINAL FORM + u'\ufef3' # 0xFB -> ARABIC LETTER YEH INITIAL FORM + u'\ufbb0' # 0xFC -> ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM + u'\ufbae' # 0xFD -> ARABIC LETTER YEH BARREE ISOLATED FORM + u'\ufe7c' # 0xFE -> ARABIC SHADDA ISOLATED FORM + u'\ufe7d' # 0xFF -> ARABIC SHADDA MEDIAL FORM ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # - 0x0081: 0x81, # - 0x0082: 0x82, # - 0x0083: 0x83, # - 0x0084: 0x84, # - 0x0085: 0x85, # - 0x0086: 0x86, # - 0x0087: 0x87, # - 0x0088: 0x88, # - 0x0089: 0x89, # - 0x008A: 0x8A, # - 0x008B: 0x8B, # - 0x008C: 0x8C, # - 0x008D: 0x8D, # - 0x008E: 0x8E, # - 0x008F: 0x8F, # - 0x0090: 0x90, # - 0x0091: 0x91, # - 0x0092: 0x92, # - 0x0093: 0x93, # - 0x0094: 0x94, # - 0x0095: 0x95, # - 0x0096: 0x96, # - 0x0097: 0x97, # - 0x0098: 0x98, # - 0x0099: 0x99, # - 0x009A: 0x9A, # - 0x009B: 0x9B, # - 0x009C: 0x9C, # - 0x009D: 0x9D, # - 0x009E: 0x9E, # - 0x009F: 0x9F, # - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00AD: 0xAD, # SOFT HYPHEN - 0x060C: 0xAB, # ARABIC COMMA - 0x061B: 0xAC, # ARABIC SEMICOLON - 0x061F: 0xAE, # ARABIC QUESTION MARK - 0x06F0: 0xA1, # EXTENDED ARABIC-INDIC DIGIT ZERO - 0x06F1: 0xA2, # EXTENDED ARABIC-INDIC DIGIT ONE - 0x06F2: 0xA3, # EXTENDED ARABIC-INDIC DIGIT TWO - 0x06F3: 0xA4, # EXTENDED ARABIC-INDIC DIGIT THREE - 0x06F4: 0xA5, # EXTENDED ARABIC-INDIC DIGIT FOUR - 0x06F5: 0xA6, # EXTENDED ARABIC-INDIC DIGIT FIVE - 0x06F6: 0xA7, # EXTENDED ARABIC-INDIC DIGIT SIX - 0x06F7: 0xA8, # EXTENDED ARABIC-INDIC DIGIT SEVEN - 0x06F8: 0xA9, # EXTENDED ARABIC-INDIC DIGIT EIGHT - 0x06F9: 0xAA, # EXTENDED ARABIC-INDIC DIGIT NINE - 0xFB56: 0xB5, # ARABIC LETTER PEH ISOLATED FORM - 0xFB58: 0xB6, # ARABIC LETTER PEH INITIAL FORM - 0xFB66: 0xBA, # ARABIC LETTER TTEH ISOLATED FORM - 0xFB68: 0xBB, # ARABIC LETTER TTEH INITIAL FORM - 0xFB7A: 0xC0, # ARABIC LETTER TCHEH ISOLATED FORM - 0xFB7C: 0xC1, # ARABIC LETTER TCHEH INITIAL FORM - 0xFB84: 0xC7, # ARABIC LETTER DAHAL ISOLATED FORMN - 0xFB8A: 0xCC, # ARABIC LETTER JEH ISOLATED FORM - 0xFB8C: 0xCA, # ARABIC LETTER RREH ISOLATED FORM - 0xFB92: 0xE5, # ARABIC LETTER GAF ISOLATED FORM - 0xFB94: 0xE6, # ARABIC LETTER GAF INITIAL FORM - 0xFB9E: 0xEC, # ARABIC LETTER NOON GHUNNA ISOLATED FORM - 0xFBA6: 0xF1, # ARABIC LETTER HEH GOAL ISOLATED FORM - 0xFBA8: 0xF2, # ARABIC LETTER HEH GOAL INITIAL FORM - 0xFBA9: 0xF3, # ARABIC LETTER HEH GOAL MEDIAL FORM - 0xFBAA: 0xF4, # ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM - 0xFBAE: 0xFD, # ARABIC LETTER YEH BARREE ISOLATED FORM - 0xFBB0: 0xFC, # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM - 0xFE7C: 0xFE, # ARABIC SHADDA ISOLATED FORM - 0xFE7D: 0xFF, # ARABIC SHADDA MEDIAL FORM - 0xFE80: 0xF5, # ARABIC LETTER HAMZA ISOLATED FORM - 0xFE81: 0xAF, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - 0xFE85: 0xEF, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - 0xFE89: 0xF6, # ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM - 0xFE8A: 0xF7, # ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM - 0xFE8B: 0xF8, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - 0xFE8D: 0xB0, # ARABIC LETTER ALEF ISOLATED FORM - 0xFE8E: None, # ARABIC LETTER ALEF FINAL FORM - 0xFE8F: 0xB3, # ARABIC LETTER BEH ISOLATED FORM - 0xFE91: 0xB4, # ARABIC LETTER BEH INITIAL FORM - 0xFE93: 0xB7, # ARABIC LETTER TEH MARBUTA ISOLATED FORM - 0xFE95: 0xB8, # ARABIC LETTER TEH ISOLATED FORM - 0xFE97: 0xB9, # ARABIC LETTER TEH INITIAL FORM - 0xFE99: 0xBC, # ARABIC LETTER THEH ISOLATED FORM - 0xFE9B: 0xBD, # ARABIC LETTER THEH INITIAL FORM - 0xFE9D: 0xBE, # ARABIC LETTER JEEM ISOLATED FORM - 0xFE9F: 0xBF, # ARABIC LETTER JEEM INITIAL FORM - 0xFEA1: 0xC2, # ARABIC LETTER HAH ISOLATED FORM - 0xFEA3: 0xC3, # ARABIC LETTER HAH INITIAL FORM - 0xFEA5: 0xC4, # ARABIC LETTER KHAH ISOLATED FORM - 0xFEA7: 0xC5, # ARABIC LETTER KHAH INITIAL FORM - 0xFEA9: 0xC6, # ARABIC LETTER DAL ISOLATED FORM - 0xFEAB: 0xC8, # ARABIC LETTER THAL ISOLATED FORM - 0xFEAD: 0xC9, # ARABIC LETTER REH ISOLATED FORM - 0xFEAF: 0xCB, # ARABIC LETTER ZAIN ISOLATED FORM - 0xFEB1: 0xCD, # ARABIC LETTER SEEN ISOLATED FORM - 0xFEB3: 0xCE, # ARABIC LETTER SEEN INITIAL FORM - 0xFEB5: 0xCF, # ARABIC LETTER SHEEN ISOLATED FORM - 0xFEB7: 0xD0, # ARABIC LETTER SHEEN INITIAL FORM - 0xFEB9: 0xD1, # ARABIC LETTER SAD ISOLATED FORM - 0xFEBB: 0xD2, # ARABIC LETTER SAD INITIAL FORM - 0xFEBD: 0xD3, # ARABIC LETTER DAD ISOLATED FORM - 0xFEBF: 0xD4, # ARABIC LETTER DAD INITIAL FORM - 0xFEC1: 0xD5, # ARABIC LETTER TAH ISOLATED FORM - 0xFEC5: 0xD6, # ARABIC LETTER ZAH ISOLATED FORM - 0xFEC9: 0xD7, # ARABIC LETTER AIN ISOLATED FORM - 0xFECA: 0xD8, # ARABIC LETTER AIN FINAL FORM - 0xFECB: 0xD9, # ARABIC LETTER AIN INITIAL FORM - 0xFECC: 0xDA, # ARABIC LETTER AIN MEDIAL FORM - 0xFECD: 0xDB, # ARABIC LETTER GHAIN ISOLATED FORM - 0xFECE: 0xDC, # ARABIC LETTER GHAIN FINAL FORM - 0xFECF: 0xDD, # ARABIC LETTER GHAIN INITIAL FORM - 0xFED0: 0xDE, # ARABIC LETTER GHAIN MEDIAL FORM - 0xFED1: 0xDF, # ARABIC LETTER FEH ISOLATED FORM - 0xFED3: 0xE0, # ARABIC LETTER FEH INITIAL FORM - 0xFED5: 0xE1, # ARABIC LETTER QAF ISOLATED FORM - 0xFED7: 0xE2, # ARABIC LETTER QAF INITIAL FORM - 0xFED9: 0xE3, # ARABIC LETTER KAF ISOLATED FORM - 0xFEDB: 0xE4, # ARABIC LETTER KAF INITIAL FORM - 0xFEDD: 0xE7, # ARABIC LETTER LAM ISOLATED FORM - 0xFEDF: 0xE8, # ARABIC LETTER LAM INITIAL FORM - 0xFEE0: 0xE9, # ARABIC LETTER LAM MEDIAL FORM - 0xFEE1: 0xEA, # ARABIC LETTER MEEM ISOLATED FORM - 0xFEE3: 0xEB, # ARABIC LETTER MEEM INITIAL FORM - 0xFEE5: 0xED, # ARABIC LETTER NOON ISOLATED FORM - 0xFEE7: 0xEE, # ARABIC LETTER NOON INITIAL FORM - 0xFEED: 0xF0, # ARABIC LETTER WAW ISOLATED FORM - 0xFEF1: 0xF9, # ARABIC LETTER YEH ISOLATED FORM - 0xFEF2: 0xFA, # ARABIC LETTER YEH FINAL FORM - 0xFEF3: 0xFB, # ARABIC LETTER YEH INITIAL FORM + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00AD: 0xAD, # SOFT HYPHEN + 0x060C: 0xAB, # ARABIC COMMA + 0x061B: 0xAC, # ARABIC SEMICOLON + 0x061F: 0xAE, # ARABIC QUESTION MARK + 0x06F0: 0xA1, # EXTENDED ARABIC-INDIC DIGIT ZERO + 0x06F1: 0xA2, # EXTENDED ARABIC-INDIC DIGIT ONE + 0x06F2: 0xA3, # EXTENDED ARABIC-INDIC DIGIT TWO + 0x06F3: 0xA4, # EXTENDED ARABIC-INDIC DIGIT THREE + 0x06F4: 0xA5, # EXTENDED ARABIC-INDIC DIGIT FOUR + 0x06F5: 0xA6, # EXTENDED ARABIC-INDIC DIGIT FIVE + 0x06F6: 0xA7, # EXTENDED ARABIC-INDIC DIGIT SIX + 0x06F7: 0xA8, # EXTENDED ARABIC-INDIC DIGIT SEVEN + 0x06F8: 0xA9, # EXTENDED ARABIC-INDIC DIGIT EIGHT + 0x06F9: 0xAA, # EXTENDED ARABIC-INDIC DIGIT NINE + 0xFB56: 0xB5, # ARABIC LETTER PEH ISOLATED FORM + 0xFB58: 0xB6, # ARABIC LETTER PEH INITIAL FORM + 0xFB66: 0xBA, # ARABIC LETTER TTEH ISOLATED FORM + 0xFB68: 0xBB, # ARABIC LETTER TTEH INITIAL FORM + 0xFB7A: 0xC0, # ARABIC LETTER TCHEH ISOLATED FORM + 0xFB7C: 0xC1, # ARABIC LETTER TCHEH INITIAL FORM + 0xFB84: 0xC7, # ARABIC LETTER DAHAL ISOLATED FORMN + 0xFB8A: 0xCC, # ARABIC LETTER JEH ISOLATED FORM + 0xFB8C: 0xCA, # ARABIC LETTER RREH ISOLATED FORM + 0xFB92: 0xE5, # ARABIC LETTER GAF ISOLATED FORM + 0xFB94: 0xE6, # ARABIC LETTER GAF INITIAL FORM + 0xFB9E: 0xEC, # ARABIC LETTER NOON GHUNNA ISOLATED FORM + 0xFBA6: 0xF1, # ARABIC LETTER HEH GOAL ISOLATED FORM + 0xFBA8: 0xF2, # ARABIC LETTER HEH GOAL INITIAL FORM + 0xFBA9: 0xF3, # ARABIC LETTER HEH GOAL MEDIAL FORM + 0xFBAA: 0xF4, # ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM + 0xFBAE: 0xFD, # ARABIC LETTER YEH BARREE ISOLATED FORM + 0xFBB0: 0xFC, # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM + 0xFE7C: 0xFE, # ARABIC SHADDA ISOLATED FORM + 0xFE7D: 0xFF, # ARABIC SHADDA MEDIAL FORM + 0xFE80: 0xF5, # ARABIC LETTER HAMZA ISOLATED FORM + 0xFE81: 0xAF, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + 0xFE85: 0xEF, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + 0xFE89: 0xF6, # ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM + 0xFE8A: 0xF7, # ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM + 0xFE8B: 0xF8, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + 0xFE8D: 0xB0, # ARABIC LETTER ALEF ISOLATED FORM + 0xFE8E: None, # ARABIC LETTER ALEF FINAL FORM + 0xFE8F: 0xB3, # ARABIC LETTER BEH ISOLATED FORM + 0xFE91: 0xB4, # ARABIC LETTER BEH INITIAL FORM + 0xFE93: 0xB7, # ARABIC LETTER TEH MARBUTA ISOLATED FORM + 0xFE95: 0xB8, # ARABIC LETTER TEH ISOLATED FORM + 0xFE97: 0xB9, # ARABIC LETTER TEH INITIAL FORM + 0xFE99: 0xBC, # ARABIC LETTER THEH ISOLATED FORM + 0xFE9B: 0xBD, # ARABIC LETTER THEH INITIAL FORM + 0xFE9D: 0xBE, # ARABIC LETTER JEEM ISOLATED FORM + 0xFE9F: 0xBF, # ARABIC LETTER JEEM INITIAL FORM + 0xFEA1: 0xC2, # ARABIC LETTER HAH ISOLATED FORM + 0xFEA3: 0xC3, # ARABIC LETTER HAH INITIAL FORM + 0xFEA5: 0xC4, # ARABIC LETTER KHAH ISOLATED FORM + 0xFEA7: 0xC5, # ARABIC LETTER KHAH INITIAL FORM + 0xFEA9: 0xC6, # ARABIC LETTER DAL ISOLATED FORM + 0xFEAB: 0xC8, # ARABIC LETTER THAL ISOLATED FORM + 0xFEAD: 0xC9, # ARABIC LETTER REH ISOLATED FORM + 0xFEAF: 0xCB, # ARABIC LETTER ZAIN ISOLATED FORM + 0xFEB1: 0xCD, # ARABIC LETTER SEEN ISOLATED FORM + 0xFEB3: 0xCE, # ARABIC LETTER SEEN INITIAL FORM + 0xFEB5: 0xCF, # ARABIC LETTER SHEEN ISOLATED FORM + 0xFEB7: 0xD0, # ARABIC LETTER SHEEN INITIAL FORM + 0xFEB9: 0xD1, # ARABIC LETTER SAD ISOLATED FORM + 0xFEBB: 0xD2, # ARABIC LETTER SAD INITIAL FORM + 0xFEBD: 0xD3, # ARABIC LETTER DAD ISOLATED FORM + 0xFEBF: 0xD4, # ARABIC LETTER DAD INITIAL FORM + 0xFEC1: 0xD5, # ARABIC LETTER TAH ISOLATED FORM + 0xFEC5: 0xD6, # ARABIC LETTER ZAH ISOLATED FORM + 0xFEC9: 0xD7, # ARABIC LETTER AIN ISOLATED FORM + 0xFECA: 0xD8, # ARABIC LETTER AIN FINAL FORM + 0xFECB: 0xD9, # ARABIC LETTER AIN INITIAL FORM + 0xFECC: 0xDA, # ARABIC LETTER AIN MEDIAL FORM + 0xFECD: 0xDB, # ARABIC LETTER GHAIN ISOLATED FORM + 0xFECE: 0xDC, # ARABIC LETTER GHAIN FINAL FORM + 0xFECF: 0xDD, # ARABIC LETTER GHAIN INITIAL FORM + 0xFED0: 0xDE, # ARABIC LETTER GHAIN MEDIAL FORM + 0xFED1: 0xDF, # ARABIC LETTER FEH ISOLATED FORM + 0xFED3: 0xE0, # ARABIC LETTER FEH INITIAL FORM + 0xFED5: 0xE1, # ARABIC LETTER QAF ISOLATED FORM + 0xFED7: 0xE2, # ARABIC LETTER QAF INITIAL FORM + 0xFED9: 0xE3, # ARABIC LETTER KAF ISOLATED FORM + 0xFEDB: 0xE4, # ARABIC LETTER KAF INITIAL FORM + 0xFEDD: 0xE7, # ARABIC LETTER LAM ISOLATED FORM + 0xFEDF: 0xE8, # ARABIC LETTER LAM INITIAL FORM + 0xFEE0: 0xE9, # ARABIC LETTER LAM MEDIAL FORM + 0xFEE1: 0xEA, # ARABIC LETTER MEEM ISOLATED FORM + 0xFEE3: 0xEB, # ARABIC LETTER MEEM INITIAL FORM + 0xFEE5: 0xED, # ARABIC LETTER NOON ISOLATED FORM + 0xFEE7: 0xEE, # ARABIC LETTER NOON INITIAL FORM + 0xFEED: 0xF0, # ARABIC LETTER WAW ISOLATED FORM + 0xFEF1: 0xF9, # ARABIC LETTER YEH ISOLATED FORM + 0xFEF2: 0xFA, # ARABIC LETTER YEH FINAL FORM + 0xFEF3: 0xFB, # ARABIC LETTER YEH INITIAL FORM } - Modified: python/branches/ssize_t/Lib/encodings/cp1026.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp1026.py (original) +++ python/branches/ssize_t/Lib/encodings/cp1026.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> CONTROL - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> CONTROL - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> CONTROL - u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0A -> CONTROL - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> CONTROL - u'\x85' # 0x15 -> CONTROL - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> CONTROL - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> CONTROL - u'\x8f' # 0x1B -> CONTROL - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> CONTROL - u'\x81' # 0x21 -> CONTROL - u'\x82' # 0x22 -> CONTROL - u'\x83' # 0x23 -> CONTROL - u'\x84' # 0x24 -> CONTROL - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> CONTROL - u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2A -> CONTROL - u'\x8b' # 0x2B -> CONTROL - u'\x8c' # 0x2C -> CONTROL - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> CONTROL - u'\x91' # 0x31 -> CONTROL - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> CONTROL - u'\x94' # 0x34 -> CONTROL - u'\x95' # 0x35 -> CONTROL - u'\x96' # 0x36 -> CONTROL - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> CONTROL - u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3A -> CONTROL - u'\x9b' # 0x3B -> CONTROL - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> CONTROL - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\xa0' # 0x41 -> NO-BREAK SPACE - u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE - u'{' # 0x48 -> LEFT CURLY BRACKET - u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE - u'\xc7' # 0x4A -> LATIN CAPITAL LETTER C WITH CEDILLA - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'!' # 0x4F -> EXCLAMATION MARK - u'&' # 0x50 -> AMPERSAND - u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE - u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE - u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'\u011e' # 0x5A -> LATIN CAPITAL LETTER G WITH BREVE - u'\u0130' # 0x5B -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'^' # 0x5F -> CIRCUMFLEX ACCENT - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'[' # 0x68 -> LEFT SQUARE BRACKET - u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE - u'\u015f' # 0x6A -> LATIN SMALL LETTER S WITH CEDILLA - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE - u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE - u'\u0131' # 0x79 -> LATIN SMALL LETTER DOTLESS I - u':' # 0x7A -> COLON - u'\xd6' # 0x7B -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\u015e' # 0x7C -> LATIN CAPITAL LETTER S WITH CEDILLA - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'\xdc' # 0x7F -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'}' # 0x8C -> RIGHT CURLY BRACKET - u'`' # 0x8D -> GRAVE ACCENT - u'\xa6' # 0x8E -> BROKEN BAR - u'\xb1' # 0x8F -> PLUS-MINUS SIGN - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR - u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE - u'\xb8' # 0x9D -> CEDILLA - u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE - u'\xa4' # 0x9F -> CURRENCY SIGN - u'\xb5' # 0xA0 -> MICRO SIGN - u'\xf6' # 0xA1 -> LATIN SMALL LETTER O WITH DIAERESIS - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK - u'\xbf' # 0xAB -> INVERTED QUESTION MARK - u']' # 0xAC -> RIGHT SQUARE BRACKET - u'$' # 0xAD -> DOLLAR SIGN - u'@' # 0xAE -> COMMERCIAL AT - u'\xae' # 0xAF -> REGISTERED SIGN - u'\xa2' # 0xB0 -> CENT SIGN - u'\xa3' # 0xB1 -> POUND SIGN - u'\xa5' # 0xB2 -> YEN SIGN - u'\xb7' # 0xB3 -> MIDDLE DOT - u'\xa9' # 0xB4 -> COPYRIGHT SIGN - u'\xa7' # 0xB5 -> SECTION SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS - u'\xac' # 0xBA -> NOT SIGN - u'|' # 0xBB -> VERTICAL LINE - u'\xaf' # 0xBC -> MACRON - u'\xa8' # 0xBD -> DIAERESIS - u'\xb4' # 0xBE -> ACUTE ACCENT - u'\xd7' # 0xBF -> MULTIPLICATION SIGN - u'\xe7' # 0xC0 -> LATIN SMALL LETTER C WITH CEDILLA - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'~' # 0xCC -> TILDE - u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE - u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE - u'\u011f' # 0xD0 -> LATIN SMALL LETTER G WITH BREVE - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xDA -> SUPERSCRIPT ONE - u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\\' # 0xDC -> REVERSE SOLIDUS - u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE - u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xfc' # 0xE0 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xf7' # 0xE1 -> DIVISION SIGN - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'#' # 0xEC -> NUMBER SIGN - u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'"' # 0xFC -> QUOTATION MARK - u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE - u'\x9f' # 0xFF -> CONTROL + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x9c' # 0x04 -> CONTROL + u'\t' # 0x05 -> HORIZONTAL TABULATION + u'\x86' # 0x06 -> CONTROL + u'\x7f' # 0x07 -> DELETE + u'\x97' # 0x08 -> CONTROL + u'\x8d' # 0x09 -> CONTROL + u'\x8e' # 0x0A -> CONTROL + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x9d' # 0x14 -> CONTROL + u'\x85' # 0x15 -> CONTROL + u'\x08' # 0x16 -> BACKSPACE + u'\x87' # 0x17 -> CONTROL + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x92' # 0x1A -> CONTROL + u'\x8f' # 0x1B -> CONTROL + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u'\x80' # 0x20 -> CONTROL + u'\x81' # 0x21 -> CONTROL + u'\x82' # 0x22 -> CONTROL + u'\x83' # 0x23 -> CONTROL + u'\x84' # 0x24 -> CONTROL + u'\n' # 0x25 -> LINE FEED + u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK + u'\x1b' # 0x27 -> ESCAPE + u'\x88' # 0x28 -> CONTROL + u'\x89' # 0x29 -> CONTROL + u'\x8a' # 0x2A -> CONTROL + u'\x8b' # 0x2B -> CONTROL + u'\x8c' # 0x2C -> CONTROL + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL + u'\x90' # 0x30 -> CONTROL + u'\x91' # 0x31 -> CONTROL + u'\x16' # 0x32 -> SYNCHRONOUS IDLE + u'\x93' # 0x33 -> CONTROL + u'\x94' # 0x34 -> CONTROL + u'\x95' # 0x35 -> CONTROL + u'\x96' # 0x36 -> CONTROL + u'\x04' # 0x37 -> END OF TRANSMISSION + u'\x98' # 0x38 -> CONTROL + u'\x99' # 0x39 -> CONTROL + u'\x9a' # 0x3A -> CONTROL + u'\x9b' # 0x3B -> CONTROL + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> CONTROL + u'\x1a' # 0x3F -> SUBSTITUTE + u' ' # 0x40 -> SPACE + u'\xa0' # 0x41 -> NO-BREAK SPACE + u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE + u'{' # 0x48 -> LEFT CURLY BRACKET + u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE + u'\xc7' # 0x4A -> LATIN CAPITAL LETTER C WITH CEDILLA + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'!' # 0x4F -> EXCLAMATION MARK + u'&' # 0x50 -> AMPERSAND + u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE + u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE + u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) + u'\u011e' # 0x5A -> LATIN CAPITAL LETTER G WITH BREVE + u'\u0130' # 0x5B -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'^' # 0x5F -> CIRCUMFLEX ACCENT + u'-' # 0x60 -> HYPHEN-MINUS + u'/' # 0x61 -> SOLIDUS + u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'[' # 0x68 -> LEFT SQUARE BRACKET + u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE + u'\u015f' # 0x6A -> LATIN SMALL LETTER S WITH CEDILLA + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK + u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE + u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE + u'\u0131' # 0x79 -> LATIN SMALL LETTER DOTLESS I + u':' # 0x7A -> COLON + u'\xd6' # 0x7B -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u015e' # 0x7C -> LATIN CAPITAL LETTER S WITH CEDILLA + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'\xdc' # 0x7F -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE + u'a' # 0x81 -> LATIN SMALL LETTER A + u'b' # 0x82 -> LATIN SMALL LETTER B + u'c' # 0x83 -> LATIN SMALL LETTER C + u'd' # 0x84 -> LATIN SMALL LETTER D + u'e' # 0x85 -> LATIN SMALL LETTER E + u'f' # 0x86 -> LATIN SMALL LETTER F + u'g' # 0x87 -> LATIN SMALL LETTER G + u'h' # 0x88 -> LATIN SMALL LETTER H + u'i' # 0x89 -> LATIN SMALL LETTER I + u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'}' # 0x8C -> RIGHT CURLY BRACKET + u'`' # 0x8D -> GRAVE ACCENT + u'\xa6' # 0x8E -> BROKEN BAR + u'\xb1' # 0x8F -> PLUS-MINUS SIGN + u'\xb0' # 0x90 -> DEGREE SIGN + u'j' # 0x91 -> LATIN SMALL LETTER J + u'k' # 0x92 -> LATIN SMALL LETTER K + u'l' # 0x93 -> LATIN SMALL LETTER L + u'm' # 0x94 -> LATIN SMALL LETTER M + u'n' # 0x95 -> LATIN SMALL LETTER N + u'o' # 0x96 -> LATIN SMALL LETTER O + u'p' # 0x97 -> LATIN SMALL LETTER P + u'q' # 0x98 -> LATIN SMALL LETTER Q + u'r' # 0x99 -> LATIN SMALL LETTER R + u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR + u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE + u'\xb8' # 0x9D -> CEDILLA + u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE + u'\xa4' # 0x9F -> CURRENCY SIGN + u'\xb5' # 0xA0 -> MICRO SIGN + u'\xf6' # 0xA1 -> LATIN SMALL LETTER O WITH DIAERESIS + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK + u'\xbf' # 0xAB -> INVERTED QUESTION MARK + u']' # 0xAC -> RIGHT SQUARE BRACKET + u'$' # 0xAD -> DOLLAR SIGN + u'@' # 0xAE -> COMMERCIAL AT + u'\xae' # 0xAF -> REGISTERED SIGN + u'\xa2' # 0xB0 -> CENT SIGN + u'\xa3' # 0xB1 -> POUND SIGN + u'\xa5' # 0xB2 -> YEN SIGN + u'\xb7' # 0xB3 -> MIDDLE DOT + u'\xa9' # 0xB4 -> COPYRIGHT SIGN + u'\xa7' # 0xB5 -> SECTION SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS + u'\xac' # 0xBA -> NOT SIGN + u'|' # 0xBB -> VERTICAL LINE + u'\xaf' # 0xBC -> MACRON + u'\xa8' # 0xBD -> DIAERESIS + u'\xb4' # 0xBE -> ACUTE ACCENT + u'\xd7' # 0xBF -> MULTIPLICATION SIGN + u'\xe7' # 0xC0 -> LATIN SMALL LETTER C WITH CEDILLA + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'~' # 0xCC -> TILDE + u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE + u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE + u'\u011f' # 0xD0 -> LATIN SMALL LETTER G WITH BREVE + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0xDA -> SUPERSCRIPT ONE + u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\\' # 0xDC -> REVERSE SOLIDUS + u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE + u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xfc' # 0xE0 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xf7' # 0xE1 -> DIVISION SIGN + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'#' # 0xEC -> NUMBER SIGN + u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'"' # 0xFC -> QUOTATION MARK + u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE + u'\x9f' # 0xFF -> CONTROL ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2D, # ENQUIRY - 0x0006: 0x2E, # ACKNOWLEDGE - 0x0007: 0x2F, # BELL - 0x0008: 0x16, # BACKSPACE - 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000A: 0x25, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3C, # DEVICE CONTROL FOUR - 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x32, # SYNCHRONOUS IDLE - 0x0017: 0x26, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x3F, # SUBSTITUTE - 0x001B: 0x27, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x40, # SPACE - 0x0021: 0x4F, # EXCLAMATION MARK - 0x0022: 0xFC, # QUOTATION MARK - 0x0023: 0xEC, # NUMBER SIGN - 0x0024: 0xAD, # DOLLAR SIGN - 0x0025: 0x6C, # PERCENT SIGN - 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7D, # APOSTROPHE - 0x0028: 0x4D, # LEFT PARENTHESIS - 0x0029: 0x5D, # RIGHT PARENTHESIS - 0x002A: 0x5C, # ASTERISK - 0x002B: 0x4E, # PLUS SIGN - 0x002C: 0x6B, # COMMA - 0x002D: 0x60, # HYPHEN-MINUS - 0x002E: 0x4B, # FULL STOP - 0x002F: 0x61, # SOLIDUS - 0x0030: 0xF0, # DIGIT ZERO - 0x0031: 0xF1, # DIGIT ONE - 0x0032: 0xF2, # DIGIT TWO - 0x0033: 0xF3, # DIGIT THREE - 0x0034: 0xF4, # DIGIT FOUR - 0x0035: 0xF5, # DIGIT FIVE - 0x0036: 0xF6, # DIGIT SIX - 0x0037: 0xF7, # DIGIT SEVEN - 0x0038: 0xF8, # DIGIT EIGHT - 0x0039: 0xF9, # DIGIT NINE - 0x003A: 0x7A, # COLON - 0x003B: 0x5E, # SEMICOLON - 0x003C: 0x4C, # LESS-THAN SIGN - 0x003D: 0x7E, # EQUALS SIGN - 0x003E: 0x6E, # GREATER-THAN SIGN - 0x003F: 0x6F, # QUESTION MARK - 0x0040: 0xAE, # COMMERCIAL AT - 0x0041: 0xC1, # LATIN CAPITAL LETTER A - 0x0042: 0xC2, # LATIN CAPITAL LETTER B - 0x0043: 0xC3, # LATIN CAPITAL LETTER C - 0x0044: 0xC4, # LATIN CAPITAL LETTER D - 0x0045: 0xC5, # LATIN CAPITAL LETTER E - 0x0046: 0xC6, # LATIN CAPITAL LETTER F - 0x0047: 0xC7, # LATIN CAPITAL LETTER G - 0x0048: 0xC8, # LATIN CAPITAL LETTER H - 0x0049: 0xC9, # LATIN CAPITAL LETTER I - 0x004A: 0xD1, # LATIN CAPITAL LETTER J - 0x004B: 0xD2, # LATIN CAPITAL LETTER K - 0x004C: 0xD3, # LATIN CAPITAL LETTER L - 0x004D: 0xD4, # LATIN CAPITAL LETTER M - 0x004E: 0xD5, # LATIN CAPITAL LETTER N - 0x004F: 0xD6, # LATIN CAPITAL LETTER O - 0x0050: 0xD7, # LATIN CAPITAL LETTER P - 0x0051: 0xD8, # LATIN CAPITAL LETTER Q - 0x0052: 0xD9, # LATIN CAPITAL LETTER R - 0x0053: 0xE2, # LATIN CAPITAL LETTER S - 0x0054: 0xE3, # LATIN CAPITAL LETTER T - 0x0055: 0xE4, # LATIN CAPITAL LETTER U - 0x0056: 0xE5, # LATIN CAPITAL LETTER V - 0x0057: 0xE6, # LATIN CAPITAL LETTER W - 0x0058: 0xE7, # LATIN CAPITAL LETTER X - 0x0059: 0xE8, # LATIN CAPITAL LETTER Y - 0x005A: 0xE9, # LATIN CAPITAL LETTER Z - 0x005B: 0x68, # LEFT SQUARE BRACKET - 0x005C: 0xDC, # REVERSE SOLIDUS - 0x005D: 0xAC, # RIGHT SQUARE BRACKET - 0x005E: 0x5F, # CIRCUMFLEX ACCENT - 0x005F: 0x6D, # LOW LINE - 0x0060: 0x8D, # GRAVE ACCENT - 0x0061: 0x81, # LATIN SMALL LETTER A - 0x0062: 0x82, # LATIN SMALL LETTER B - 0x0063: 0x83, # LATIN SMALL LETTER C - 0x0064: 0x84, # LATIN SMALL LETTER D - 0x0065: 0x85, # LATIN SMALL LETTER E - 0x0066: 0x86, # LATIN SMALL LETTER F - 0x0067: 0x87, # LATIN SMALL LETTER G - 0x0068: 0x88, # LATIN SMALL LETTER H - 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006A: 0x91, # LATIN SMALL LETTER J - 0x006B: 0x92, # LATIN SMALL LETTER K - 0x006C: 0x93, # LATIN SMALL LETTER L - 0x006D: 0x94, # LATIN SMALL LETTER M - 0x006E: 0x95, # LATIN SMALL LETTER N - 0x006F: 0x96, # LATIN SMALL LETTER O - 0x0070: 0x97, # LATIN SMALL LETTER P - 0x0071: 0x98, # LATIN SMALL LETTER Q - 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xA2, # LATIN SMALL LETTER S - 0x0074: 0xA3, # LATIN SMALL LETTER T - 0x0075: 0xA4, # LATIN SMALL LETTER U - 0x0076: 0xA5, # LATIN SMALL LETTER V - 0x0077: 0xA6, # LATIN SMALL LETTER W - 0x0078: 0xA7, # LATIN SMALL LETTER X - 0x0079: 0xA8, # LATIN SMALL LETTER Y - 0x007A: 0xA9, # LATIN SMALL LETTER Z - 0x007B: 0x48, # LEFT CURLY BRACKET - 0x007C: 0xBB, # VERTICAL LINE - 0x007D: 0x8C, # RIGHT CURLY BRACKET - 0x007E: 0xCC, # TILDE - 0x007F: 0x07, # DELETE - 0x0080: 0x20, # CONTROL - 0x0081: 0x21, # CONTROL - 0x0082: 0x22, # CONTROL - 0x0083: 0x23, # CONTROL - 0x0084: 0x24, # CONTROL - 0x0085: 0x15, # CONTROL - 0x0086: 0x06, # CONTROL - 0x0087: 0x17, # CONTROL - 0x0088: 0x28, # CONTROL - 0x0089: 0x29, # CONTROL - 0x008A: 0x2A, # CONTROL - 0x008B: 0x2B, # CONTROL - 0x008C: 0x2C, # CONTROL - 0x008D: 0x09, # CONTROL - 0x008E: 0x0A, # CONTROL - 0x008F: 0x1B, # CONTROL - 0x0090: 0x30, # CONTROL - 0x0091: 0x31, # CONTROL - 0x0092: 0x1A, # CONTROL - 0x0093: 0x33, # CONTROL - 0x0094: 0x34, # CONTROL - 0x0095: 0x35, # CONTROL - 0x0096: 0x36, # CONTROL - 0x0097: 0x08, # CONTROL - 0x0098: 0x38, # CONTROL - 0x0099: 0x39, # CONTROL - 0x009A: 0x3A, # CONTROL - 0x009B: 0x3B, # CONTROL - 0x009C: 0x04, # CONTROL - 0x009D: 0x14, # CONTROL - 0x009E: 0x3E, # CONTROL - 0x009F: 0xFF, # CONTROL - 0x00A0: 0x41, # NO-BREAK SPACE - 0x00A1: 0xAA, # INVERTED EXCLAMATION MARK - 0x00A2: 0xB0, # CENT SIGN - 0x00A3: 0xB1, # POUND SIGN - 0x00A4: 0x9F, # CURRENCY SIGN - 0x00A5: 0xB2, # YEN SIGN - 0x00A6: 0x8E, # BROKEN BAR - 0x00A7: 0xB5, # SECTION SIGN - 0x00A8: 0xBD, # DIAERESIS - 0x00A9: 0xB4, # COPYRIGHT SIGN - 0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR - 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xBA, # NOT SIGN - 0x00AD: 0xCA, # SOFT HYPHEN - 0x00AE: 0xAF, # REGISTERED SIGN - 0x00AF: 0xBC, # MACRON - 0x00B0: 0x90, # DEGREE SIGN - 0x00B1: 0x8F, # PLUS-MINUS SIGN - 0x00B2: 0xEA, # SUPERSCRIPT TWO - 0x00B3: 0xFA, # SUPERSCRIPT THREE - 0x00B4: 0xBE, # ACUTE ACCENT - 0x00B5: 0xA0, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB3, # MIDDLE DOT - 0x00B8: 0x9D, # CEDILLA - 0x00B9: 0xDA, # SUPERSCRIPT ONE - 0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF - 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xAB, # INVERTED QUESTION MARK - 0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE - 0x00C7: 0x4A, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0x7B, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xBF, # MULTIPLICATION SIGN - 0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0x7F, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0x9C, # LATIN SMALL LIGATURE AE - 0x00E7: 0xC0, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xA1, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xE1, # DIVISION SIGN - 0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xE0, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x011E: 0x5A, # LATIN CAPITAL LETTER G WITH BREVE - 0x011F: 0xD0, # LATIN SMALL LETTER G WITH BREVE - 0x0130: 0x5B, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0131: 0x79, # LATIN SMALL LETTER DOTLESS I - 0x015E: 0x7C, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015F: 0x6A, # LATIN SMALL LETTER S WITH CEDILLA + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x37, # END OF TRANSMISSION + 0x0005: 0x2D, # ENQUIRY + 0x0006: 0x2E, # ACKNOWLEDGE + 0x0007: 0x2F, # BELL + 0x0008: 0x16, # BACKSPACE + 0x0009: 0x05, # HORIZONTAL TABULATION + 0x000A: 0x25, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x3C, # DEVICE CONTROL FOUR + 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x32, # SYNCHRONOUS IDLE + 0x0017: 0x26, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x3F, # SUBSTITUTE + 0x001B: 0x27, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x40, # SPACE + 0x0021: 0x4F, # EXCLAMATION MARK + 0x0022: 0xFC, # QUOTATION MARK + 0x0023: 0xEC, # NUMBER SIGN + 0x0024: 0xAD, # DOLLAR SIGN + 0x0025: 0x6C, # PERCENT SIGN + 0x0026: 0x50, # AMPERSAND + 0x0027: 0x7D, # APOSTROPHE + 0x0028: 0x4D, # LEFT PARENTHESIS + 0x0029: 0x5D, # RIGHT PARENTHESIS + 0x002A: 0x5C, # ASTERISK + 0x002B: 0x4E, # PLUS SIGN + 0x002C: 0x6B, # COMMA + 0x002D: 0x60, # HYPHEN-MINUS + 0x002E: 0x4B, # FULL STOP + 0x002F: 0x61, # SOLIDUS + 0x0030: 0xF0, # DIGIT ZERO + 0x0031: 0xF1, # DIGIT ONE + 0x0032: 0xF2, # DIGIT TWO + 0x0033: 0xF3, # DIGIT THREE + 0x0034: 0xF4, # DIGIT FOUR + 0x0035: 0xF5, # DIGIT FIVE + 0x0036: 0xF6, # DIGIT SIX + 0x0037: 0xF7, # DIGIT SEVEN + 0x0038: 0xF8, # DIGIT EIGHT + 0x0039: 0xF9, # DIGIT NINE + 0x003A: 0x7A, # COLON + 0x003B: 0x5E, # SEMICOLON + 0x003C: 0x4C, # LESS-THAN SIGN + 0x003D: 0x7E, # EQUALS SIGN + 0x003E: 0x6E, # GREATER-THAN SIGN + 0x003F: 0x6F, # QUESTION MARK + 0x0040: 0xAE, # COMMERCIAL AT + 0x0041: 0xC1, # LATIN CAPITAL LETTER A + 0x0042: 0xC2, # LATIN CAPITAL LETTER B + 0x0043: 0xC3, # LATIN CAPITAL LETTER C + 0x0044: 0xC4, # LATIN CAPITAL LETTER D + 0x0045: 0xC5, # LATIN CAPITAL LETTER E + 0x0046: 0xC6, # LATIN CAPITAL LETTER F + 0x0047: 0xC7, # LATIN CAPITAL LETTER G + 0x0048: 0xC8, # LATIN CAPITAL LETTER H + 0x0049: 0xC9, # LATIN CAPITAL LETTER I + 0x004A: 0xD1, # LATIN CAPITAL LETTER J + 0x004B: 0xD2, # LATIN CAPITAL LETTER K + 0x004C: 0xD3, # LATIN CAPITAL LETTER L + 0x004D: 0xD4, # LATIN CAPITAL LETTER M + 0x004E: 0xD5, # LATIN CAPITAL LETTER N + 0x004F: 0xD6, # LATIN CAPITAL LETTER O + 0x0050: 0xD7, # LATIN CAPITAL LETTER P + 0x0051: 0xD8, # LATIN CAPITAL LETTER Q + 0x0052: 0xD9, # LATIN CAPITAL LETTER R + 0x0053: 0xE2, # LATIN CAPITAL LETTER S + 0x0054: 0xE3, # LATIN CAPITAL LETTER T + 0x0055: 0xE4, # LATIN CAPITAL LETTER U + 0x0056: 0xE5, # LATIN CAPITAL LETTER V + 0x0057: 0xE6, # LATIN CAPITAL LETTER W + 0x0058: 0xE7, # LATIN CAPITAL LETTER X + 0x0059: 0xE8, # LATIN CAPITAL LETTER Y + 0x005A: 0xE9, # LATIN CAPITAL LETTER Z + 0x005B: 0x68, # LEFT SQUARE BRACKET + 0x005C: 0xDC, # REVERSE SOLIDUS + 0x005D: 0xAC, # RIGHT SQUARE BRACKET + 0x005E: 0x5F, # CIRCUMFLEX ACCENT + 0x005F: 0x6D, # LOW LINE + 0x0060: 0x8D, # GRAVE ACCENT + 0x0061: 0x81, # LATIN SMALL LETTER A + 0x0062: 0x82, # LATIN SMALL LETTER B + 0x0063: 0x83, # LATIN SMALL LETTER C + 0x0064: 0x84, # LATIN SMALL LETTER D + 0x0065: 0x85, # LATIN SMALL LETTER E + 0x0066: 0x86, # LATIN SMALL LETTER F + 0x0067: 0x87, # LATIN SMALL LETTER G + 0x0068: 0x88, # LATIN SMALL LETTER H + 0x0069: 0x89, # LATIN SMALL LETTER I + 0x006A: 0x91, # LATIN SMALL LETTER J + 0x006B: 0x92, # LATIN SMALL LETTER K + 0x006C: 0x93, # LATIN SMALL LETTER L + 0x006D: 0x94, # LATIN SMALL LETTER M + 0x006E: 0x95, # LATIN SMALL LETTER N + 0x006F: 0x96, # LATIN SMALL LETTER O + 0x0070: 0x97, # LATIN SMALL LETTER P + 0x0071: 0x98, # LATIN SMALL LETTER Q + 0x0072: 0x99, # LATIN SMALL LETTER R + 0x0073: 0xA2, # LATIN SMALL LETTER S + 0x0074: 0xA3, # LATIN SMALL LETTER T + 0x0075: 0xA4, # LATIN SMALL LETTER U + 0x0076: 0xA5, # LATIN SMALL LETTER V + 0x0077: 0xA6, # LATIN SMALL LETTER W + 0x0078: 0xA7, # LATIN SMALL LETTER X + 0x0079: 0xA8, # LATIN SMALL LETTER Y + 0x007A: 0xA9, # LATIN SMALL LETTER Z + 0x007B: 0x48, # LEFT CURLY BRACKET + 0x007C: 0xBB, # VERTICAL LINE + 0x007D: 0x8C, # RIGHT CURLY BRACKET + 0x007E: 0xCC, # TILDE + 0x007F: 0x07, # DELETE + 0x0080: 0x20, # CONTROL + 0x0081: 0x21, # CONTROL + 0x0082: 0x22, # CONTROL + 0x0083: 0x23, # CONTROL + 0x0084: 0x24, # CONTROL + 0x0085: 0x15, # CONTROL + 0x0086: 0x06, # CONTROL + 0x0087: 0x17, # CONTROL + 0x0088: 0x28, # CONTROL + 0x0089: 0x29, # CONTROL + 0x008A: 0x2A, # CONTROL + 0x008B: 0x2B, # CONTROL + 0x008C: 0x2C, # CONTROL + 0x008D: 0x09, # CONTROL + 0x008E: 0x0A, # CONTROL + 0x008F: 0x1B, # CONTROL + 0x0090: 0x30, # CONTROL + 0x0091: 0x31, # CONTROL + 0x0092: 0x1A, # CONTROL + 0x0093: 0x33, # CONTROL + 0x0094: 0x34, # CONTROL + 0x0095: 0x35, # CONTROL + 0x0096: 0x36, # CONTROL + 0x0097: 0x08, # CONTROL + 0x0098: 0x38, # CONTROL + 0x0099: 0x39, # CONTROL + 0x009A: 0x3A, # CONTROL + 0x009B: 0x3B, # CONTROL + 0x009C: 0x04, # CONTROL + 0x009D: 0x14, # CONTROL + 0x009E: 0x3E, # CONTROL + 0x009F: 0xFF, # CONTROL + 0x00A0: 0x41, # NO-BREAK SPACE + 0x00A1: 0xAA, # INVERTED EXCLAMATION MARK + 0x00A2: 0xB0, # CENT SIGN + 0x00A3: 0xB1, # POUND SIGN + 0x00A4: 0x9F, # CURRENCY SIGN + 0x00A5: 0xB2, # YEN SIGN + 0x00A6: 0x8E, # BROKEN BAR + 0x00A7: 0xB5, # SECTION SIGN + 0x00A8: 0xBD, # DIAERESIS + 0x00A9: 0xB4, # COPYRIGHT SIGN + 0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR + 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xBA, # NOT SIGN + 0x00AD: 0xCA, # SOFT HYPHEN + 0x00AE: 0xAF, # REGISTERED SIGN + 0x00AF: 0xBC, # MACRON + 0x00B0: 0x90, # DEGREE SIGN + 0x00B1: 0x8F, # PLUS-MINUS SIGN + 0x00B2: 0xEA, # SUPERSCRIPT TWO + 0x00B3: 0xFA, # SUPERSCRIPT THREE + 0x00B4: 0xBE, # ACUTE ACCENT + 0x00B5: 0xA0, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB3, # MIDDLE DOT + 0x00B8: 0x9D, # CEDILLA + 0x00B9: 0xDA, # SUPERSCRIPT ONE + 0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF + 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xAB, # INVERTED QUESTION MARK + 0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE + 0x00C7: 0x4A, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0x7B, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xBF, # MULTIPLICATION SIGN + 0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0x7F, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0x9C, # LATIN SMALL LIGATURE AE + 0x00E7: 0xC0, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xA1, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xE1, # DIVISION SIGN + 0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xE0, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011E: 0x5A, # LATIN CAPITAL LETTER G WITH BREVE + 0x011F: 0xD0, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0x5B, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0x79, # LATIN SMALL LETTER DOTLESS I + 0x015E: 0x7C, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015F: 0x6A, # LATIN SMALL LETTER S WITH CEDILLA } - Modified: python/branches/ssize_t/Lib/encodings/cp1140.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp1140.py (original) +++ python/branches/ssize_t/Lib/encodings/cp1140.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> CONTROL - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> CONTROL - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> CONTROL - u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0A -> CONTROL - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> CONTROL - u'\x85' # 0x15 -> CONTROL - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> CONTROL - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> CONTROL - u'\x8f' # 0x1B -> CONTROL - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> CONTROL - u'\x81' # 0x21 -> CONTROL - u'\x82' # 0x22 -> CONTROL - u'\x83' # 0x23 -> CONTROL - u'\x84' # 0x24 -> CONTROL - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> CONTROL - u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2A -> CONTROL - u'\x8b' # 0x2B -> CONTROL - u'\x8c' # 0x2C -> CONTROL - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> CONTROL - u'\x91' # 0x31 -> CONTROL - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> CONTROL - u'\x94' # 0x34 -> CONTROL - u'\x95' # 0x35 -> CONTROL - u'\x96' # 0x36 -> CONTROL - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> CONTROL - u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3A -> CONTROL - u'\x9b' # 0x3B -> CONTROL - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> CONTROL - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\xa0' # 0x41 -> NO-BREAK SPACE - u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE - u'\xa2' # 0x4A -> CENT SIGN - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'|' # 0x4F -> VERTICAL LINE - u'&' # 0x50 -> AMPERSAND - u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE - u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE - u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'!' # 0x5A -> EXCLAMATION MARK - u'$' # 0x5B -> DOLLAR SIGN - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'\xac' # 0x5F -> NOT SIGN - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xa6' # 0x6A -> BROKEN BAR - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE - u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE - u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7A -> COLON - u'#' # 0x7B -> NUMBER SIGN - u'@' # 0x7C -> COMMERCIAL AT - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'"' # 0x7F -> QUOTATION MARK - u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) - u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) - u'\xb1' # 0x8F -> PLUS-MINUS SIGN - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR - u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE - u'\xb8' # 0x9D -> CEDILLA - u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE - u'\u20ac' # 0x9F -> EURO SIGN - u'\xb5' # 0xA0 -> MICRO SIGN - u'~' # 0xA1 -> TILDE - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK - u'\xbf' # 0xAB -> INVERTED QUESTION MARK - u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) - u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) - u'\xae' # 0xAF -> REGISTERED SIGN - u'^' # 0xB0 -> CIRCUMFLEX ACCENT - u'\xa3' # 0xB1 -> POUND SIGN - u'\xa5' # 0xB2 -> YEN SIGN - u'\xb7' # 0xB3 -> MIDDLE DOT - u'\xa9' # 0xB4 -> COPYRIGHT SIGN - u'\xa7' # 0xB5 -> SECTION SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS - u'[' # 0xBA -> LEFT SQUARE BRACKET - u']' # 0xBB -> RIGHT SQUARE BRACKET - u'\xaf' # 0xBC -> MACRON - u'\xa8' # 0xBD -> DIAERESIS - u'\xb4' # 0xBE -> ACUTE ACCENT - u'\xd7' # 0xBF -> MULTIPLICATION SIGN - u'{' # 0xC0 -> LEFT CURLY BRACKET - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE - u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE - u'}' # 0xD0 -> RIGHT CURLY BRACKET - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xDA -> SUPERSCRIPT ONE - u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE - u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\\' # 0xE0 -> REVERSE SOLIDUS - u'\xf7' # 0xE1 -> DIVISION SIGN - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE - u'\x9f' # 0xFF -> CONTROL + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x9c' # 0x04 -> CONTROL + u'\t' # 0x05 -> HORIZONTAL TABULATION + u'\x86' # 0x06 -> CONTROL + u'\x7f' # 0x07 -> DELETE + u'\x97' # 0x08 -> CONTROL + u'\x8d' # 0x09 -> CONTROL + u'\x8e' # 0x0A -> CONTROL + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x9d' # 0x14 -> CONTROL + u'\x85' # 0x15 -> CONTROL + u'\x08' # 0x16 -> BACKSPACE + u'\x87' # 0x17 -> CONTROL + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x92' # 0x1A -> CONTROL + u'\x8f' # 0x1B -> CONTROL + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u'\x80' # 0x20 -> CONTROL + u'\x81' # 0x21 -> CONTROL + u'\x82' # 0x22 -> CONTROL + u'\x83' # 0x23 -> CONTROL + u'\x84' # 0x24 -> CONTROL + u'\n' # 0x25 -> LINE FEED + u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK + u'\x1b' # 0x27 -> ESCAPE + u'\x88' # 0x28 -> CONTROL + u'\x89' # 0x29 -> CONTROL + u'\x8a' # 0x2A -> CONTROL + u'\x8b' # 0x2B -> CONTROL + u'\x8c' # 0x2C -> CONTROL + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL + u'\x90' # 0x30 -> CONTROL + u'\x91' # 0x31 -> CONTROL + u'\x16' # 0x32 -> SYNCHRONOUS IDLE + u'\x93' # 0x33 -> CONTROL + u'\x94' # 0x34 -> CONTROL + u'\x95' # 0x35 -> CONTROL + u'\x96' # 0x36 -> CONTROL + u'\x04' # 0x37 -> END OF TRANSMISSION + u'\x98' # 0x38 -> CONTROL + u'\x99' # 0x39 -> CONTROL + u'\x9a' # 0x3A -> CONTROL + u'\x9b' # 0x3B -> CONTROL + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> CONTROL + u'\x1a' # 0x3F -> SUBSTITUTE + u' ' # 0x40 -> SPACE + u'\xa0' # 0x41 -> NO-BREAK SPACE + u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE + u'\xa2' # 0x4A -> CENT SIGN + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'|' # 0x4F -> VERTICAL LINE + u'&' # 0x50 -> AMPERSAND + u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE + u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE + u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) + u'!' # 0x5A -> EXCLAMATION MARK + u'$' # 0x5B -> DOLLAR SIGN + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'\xac' # 0x5F -> NOT SIGN + u'-' # 0x60 -> HYPHEN-MINUS + u'/' # 0x61 -> SOLIDUS + u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xa6' # 0x6A -> BROKEN BAR + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK + u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE + u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE + u'`' # 0x79 -> GRAVE ACCENT + u':' # 0x7A -> COLON + u'#' # 0x7B -> NUMBER SIGN + u'@' # 0x7C -> COMMERCIAL AT + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'"' # 0x7F -> QUOTATION MARK + u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE + u'a' # 0x81 -> LATIN SMALL LETTER A + u'b' # 0x82 -> LATIN SMALL LETTER B + u'c' # 0x83 -> LATIN SMALL LETTER C + u'd' # 0x84 -> LATIN SMALL LETTER D + u'e' # 0x85 -> LATIN SMALL LETTER E + u'f' # 0x86 -> LATIN SMALL LETTER F + u'g' # 0x87 -> LATIN SMALL LETTER G + u'h' # 0x88 -> LATIN SMALL LETTER H + u'i' # 0x89 -> LATIN SMALL LETTER I + u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) + u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) + u'\xb1' # 0x8F -> PLUS-MINUS SIGN + u'\xb0' # 0x90 -> DEGREE SIGN + u'j' # 0x91 -> LATIN SMALL LETTER J + u'k' # 0x92 -> LATIN SMALL LETTER K + u'l' # 0x93 -> LATIN SMALL LETTER L + u'm' # 0x94 -> LATIN SMALL LETTER M + u'n' # 0x95 -> LATIN SMALL LETTER N + u'o' # 0x96 -> LATIN SMALL LETTER O + u'p' # 0x97 -> LATIN SMALL LETTER P + u'q' # 0x98 -> LATIN SMALL LETTER Q + u'r' # 0x99 -> LATIN SMALL LETTER R + u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR + u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE + u'\xb8' # 0x9D -> CEDILLA + u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE + u'\u20ac' # 0x9F -> EURO SIGN + u'\xb5' # 0xA0 -> MICRO SIGN + u'~' # 0xA1 -> TILDE + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK + u'\xbf' # 0xAB -> INVERTED QUESTION MARK + u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) + u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) + u'\xae' # 0xAF -> REGISTERED SIGN + u'^' # 0xB0 -> CIRCUMFLEX ACCENT + u'\xa3' # 0xB1 -> POUND SIGN + u'\xa5' # 0xB2 -> YEN SIGN + u'\xb7' # 0xB3 -> MIDDLE DOT + u'\xa9' # 0xB4 -> COPYRIGHT SIGN + u'\xa7' # 0xB5 -> SECTION SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS + u'[' # 0xBA -> LEFT SQUARE BRACKET + u']' # 0xBB -> RIGHT SQUARE BRACKET + u'\xaf' # 0xBC -> MACRON + u'\xa8' # 0xBD -> DIAERESIS + u'\xb4' # 0xBE -> ACUTE ACCENT + u'\xd7' # 0xBF -> MULTIPLICATION SIGN + u'{' # 0xC0 -> LEFT CURLY BRACKET + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE + u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE + u'}' # 0xD0 -> RIGHT CURLY BRACKET + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0xDA -> SUPERSCRIPT ONE + u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE + u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\\' # 0xE0 -> REVERSE SOLIDUS + u'\xf7' # 0xE1 -> DIVISION SIGN + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE + u'\x9f' # 0xFF -> CONTROL ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2D, # ENQUIRY - 0x0006: 0x2E, # ACKNOWLEDGE - 0x0007: 0x2F, # BELL - 0x0008: 0x16, # BACKSPACE - 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000A: 0x25, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3C, # DEVICE CONTROL FOUR - 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x32, # SYNCHRONOUS IDLE - 0x0017: 0x26, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x3F, # SUBSTITUTE - 0x001B: 0x27, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x40, # SPACE - 0x0021: 0x5A, # EXCLAMATION MARK - 0x0022: 0x7F, # QUOTATION MARK - 0x0023: 0x7B, # NUMBER SIGN - 0x0024: 0x5B, # DOLLAR SIGN - 0x0025: 0x6C, # PERCENT SIGN - 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7D, # APOSTROPHE - 0x0028: 0x4D, # LEFT PARENTHESIS - 0x0029: 0x5D, # RIGHT PARENTHESIS - 0x002A: 0x5C, # ASTERISK - 0x002B: 0x4E, # PLUS SIGN - 0x002C: 0x6B, # COMMA - 0x002D: 0x60, # HYPHEN-MINUS - 0x002E: 0x4B, # FULL STOP - 0x002F: 0x61, # SOLIDUS - 0x0030: 0xF0, # DIGIT ZERO - 0x0031: 0xF1, # DIGIT ONE - 0x0032: 0xF2, # DIGIT TWO - 0x0033: 0xF3, # DIGIT THREE - 0x0034: 0xF4, # DIGIT FOUR - 0x0035: 0xF5, # DIGIT FIVE - 0x0036: 0xF6, # DIGIT SIX - 0x0037: 0xF7, # DIGIT SEVEN - 0x0038: 0xF8, # DIGIT EIGHT - 0x0039: 0xF9, # DIGIT NINE - 0x003A: 0x7A, # COLON - 0x003B: 0x5E, # SEMICOLON - 0x003C: 0x4C, # LESS-THAN SIGN - 0x003D: 0x7E, # EQUALS SIGN - 0x003E: 0x6E, # GREATER-THAN SIGN - 0x003F: 0x6F, # QUESTION MARK - 0x0040: 0x7C, # COMMERCIAL AT - 0x0041: 0xC1, # LATIN CAPITAL LETTER A - 0x0042: 0xC2, # LATIN CAPITAL LETTER B - 0x0043: 0xC3, # LATIN CAPITAL LETTER C - 0x0044: 0xC4, # LATIN CAPITAL LETTER D - 0x0045: 0xC5, # LATIN CAPITAL LETTER E - 0x0046: 0xC6, # LATIN CAPITAL LETTER F - 0x0047: 0xC7, # LATIN CAPITAL LETTER G - 0x0048: 0xC8, # LATIN CAPITAL LETTER H - 0x0049: 0xC9, # LATIN CAPITAL LETTER I - 0x004A: 0xD1, # LATIN CAPITAL LETTER J - 0x004B: 0xD2, # LATIN CAPITAL LETTER K - 0x004C: 0xD3, # LATIN CAPITAL LETTER L - 0x004D: 0xD4, # LATIN CAPITAL LETTER M - 0x004E: 0xD5, # LATIN CAPITAL LETTER N - 0x004F: 0xD6, # LATIN CAPITAL LETTER O - 0x0050: 0xD7, # LATIN CAPITAL LETTER P - 0x0051: 0xD8, # LATIN CAPITAL LETTER Q - 0x0052: 0xD9, # LATIN CAPITAL LETTER R - 0x0053: 0xE2, # LATIN CAPITAL LETTER S - 0x0054: 0xE3, # LATIN CAPITAL LETTER T - 0x0055: 0xE4, # LATIN CAPITAL LETTER U - 0x0056: 0xE5, # LATIN CAPITAL LETTER V - 0x0057: 0xE6, # LATIN CAPITAL LETTER W - 0x0058: 0xE7, # LATIN CAPITAL LETTER X - 0x0059: 0xE8, # LATIN CAPITAL LETTER Y - 0x005A: 0xE9, # LATIN CAPITAL LETTER Z - 0x005B: 0xBA, # LEFT SQUARE BRACKET - 0x005C: 0xE0, # REVERSE SOLIDUS - 0x005D: 0xBB, # RIGHT SQUARE BRACKET - 0x005E: 0xB0, # CIRCUMFLEX ACCENT - 0x005F: 0x6D, # LOW LINE - 0x0060: 0x79, # GRAVE ACCENT - 0x0061: 0x81, # LATIN SMALL LETTER A - 0x0062: 0x82, # LATIN SMALL LETTER B - 0x0063: 0x83, # LATIN SMALL LETTER C - 0x0064: 0x84, # LATIN SMALL LETTER D - 0x0065: 0x85, # LATIN SMALL LETTER E - 0x0066: 0x86, # LATIN SMALL LETTER F - 0x0067: 0x87, # LATIN SMALL LETTER G - 0x0068: 0x88, # LATIN SMALL LETTER H - 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006A: 0x91, # LATIN SMALL LETTER J - 0x006B: 0x92, # LATIN SMALL LETTER K - 0x006C: 0x93, # LATIN SMALL LETTER L - 0x006D: 0x94, # LATIN SMALL LETTER M - 0x006E: 0x95, # LATIN SMALL LETTER N - 0x006F: 0x96, # LATIN SMALL LETTER O - 0x0070: 0x97, # LATIN SMALL LETTER P - 0x0071: 0x98, # LATIN SMALL LETTER Q - 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xA2, # LATIN SMALL LETTER S - 0x0074: 0xA3, # LATIN SMALL LETTER T - 0x0075: 0xA4, # LATIN SMALL LETTER U - 0x0076: 0xA5, # LATIN SMALL LETTER V - 0x0077: 0xA6, # LATIN SMALL LETTER W - 0x0078: 0xA7, # LATIN SMALL LETTER X - 0x0079: 0xA8, # LATIN SMALL LETTER Y - 0x007A: 0xA9, # LATIN SMALL LETTER Z - 0x007B: 0xC0, # LEFT CURLY BRACKET - 0x007C: 0x4F, # VERTICAL LINE - 0x007D: 0xD0, # RIGHT CURLY BRACKET - 0x007E: 0xA1, # TILDE - 0x007F: 0x07, # DELETE - 0x0080: 0x20, # CONTROL - 0x0081: 0x21, # CONTROL - 0x0082: 0x22, # CONTROL - 0x0083: 0x23, # CONTROL - 0x0084: 0x24, # CONTROL - 0x0085: 0x15, # CONTROL - 0x0086: 0x06, # CONTROL - 0x0087: 0x17, # CONTROL - 0x0088: 0x28, # CONTROL - 0x0089: 0x29, # CONTROL - 0x008A: 0x2A, # CONTROL - 0x008B: 0x2B, # CONTROL - 0x008C: 0x2C, # CONTROL - 0x008D: 0x09, # CONTROL - 0x008E: 0x0A, # CONTROL - 0x008F: 0x1B, # CONTROL - 0x0090: 0x30, # CONTROL - 0x0091: 0x31, # CONTROL - 0x0092: 0x1A, # CONTROL - 0x0093: 0x33, # CONTROL - 0x0094: 0x34, # CONTROL - 0x0095: 0x35, # CONTROL - 0x0096: 0x36, # CONTROL - 0x0097: 0x08, # CONTROL - 0x0098: 0x38, # CONTROL - 0x0099: 0x39, # CONTROL - 0x009A: 0x3A, # CONTROL - 0x009B: 0x3B, # CONTROL - 0x009C: 0x04, # CONTROL - 0x009D: 0x14, # CONTROL - 0x009E: 0x3E, # CONTROL - 0x009F: 0xFF, # CONTROL - 0x00A0: 0x41, # NO-BREAK SPACE - 0x00A1: 0xAA, # INVERTED EXCLAMATION MARK - 0x00A2: 0x4A, # CENT SIGN - 0x00A3: 0xB1, # POUND SIGN - 0x00A5: 0xB2, # YEN SIGN - 0x00A6: 0x6A, # BROKEN BAR - 0x00A7: 0xB5, # SECTION SIGN - 0x00A8: 0xBD, # DIAERESIS - 0x00A9: 0xB4, # COPYRIGHT SIGN - 0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR - 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0x5F, # NOT SIGN - 0x00AD: 0xCA, # SOFT HYPHEN - 0x00AE: 0xAF, # REGISTERED SIGN - 0x00AF: 0xBC, # MACRON - 0x00B0: 0x90, # DEGREE SIGN - 0x00B1: 0x8F, # PLUS-MINUS SIGN - 0x00B2: 0xEA, # SUPERSCRIPT TWO - 0x00B3: 0xFA, # SUPERSCRIPT THREE - 0x00B4: 0xBE, # ACUTE ACCENT - 0x00B5: 0xA0, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB3, # MIDDLE DOT - 0x00B8: 0x9D, # CEDILLA - 0x00B9: 0xDA, # SUPERSCRIPT ONE - 0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF - 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xAB, # INVERTED QUESTION MARK - 0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE - 0x00C7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D0: 0xAC, # LATIN CAPITAL LETTER ETH (ICELANDIC) - 0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xEC, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xBF, # MULTIPLICATION SIGN - 0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xFC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xAD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DE: 0xAE, # LATIN CAPITAL LETTER THORN (ICELANDIC) - 0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0x9C, # LATIN SMALL LIGATURE AE - 0x00E7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F0: 0x8C, # LATIN SMALL LETTER ETH (ICELANDIC) - 0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xCC, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xE1, # DIVISION SIGN - 0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xDC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0x8D, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FE: 0x8E, # LATIN SMALL LETTER THORN (ICELANDIC) - 0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x20AC: 0x9F, # EURO SIGN + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x37, # END OF TRANSMISSION + 0x0005: 0x2D, # ENQUIRY + 0x0006: 0x2E, # ACKNOWLEDGE + 0x0007: 0x2F, # BELL + 0x0008: 0x16, # BACKSPACE + 0x0009: 0x05, # HORIZONTAL TABULATION + 0x000A: 0x25, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x3C, # DEVICE CONTROL FOUR + 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x32, # SYNCHRONOUS IDLE + 0x0017: 0x26, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x3F, # SUBSTITUTE + 0x001B: 0x27, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x40, # SPACE + 0x0021: 0x5A, # EXCLAMATION MARK + 0x0022: 0x7F, # QUOTATION MARK + 0x0023: 0x7B, # NUMBER SIGN + 0x0024: 0x5B, # DOLLAR SIGN + 0x0025: 0x6C, # PERCENT SIGN + 0x0026: 0x50, # AMPERSAND + 0x0027: 0x7D, # APOSTROPHE + 0x0028: 0x4D, # LEFT PARENTHESIS + 0x0029: 0x5D, # RIGHT PARENTHESIS + 0x002A: 0x5C, # ASTERISK + 0x002B: 0x4E, # PLUS SIGN + 0x002C: 0x6B, # COMMA + 0x002D: 0x60, # HYPHEN-MINUS + 0x002E: 0x4B, # FULL STOP + 0x002F: 0x61, # SOLIDUS + 0x0030: 0xF0, # DIGIT ZERO + 0x0031: 0xF1, # DIGIT ONE + 0x0032: 0xF2, # DIGIT TWO + 0x0033: 0xF3, # DIGIT THREE + 0x0034: 0xF4, # DIGIT FOUR + 0x0035: 0xF5, # DIGIT FIVE + 0x0036: 0xF6, # DIGIT SIX + 0x0037: 0xF7, # DIGIT SEVEN + 0x0038: 0xF8, # DIGIT EIGHT + 0x0039: 0xF9, # DIGIT NINE + 0x003A: 0x7A, # COLON + 0x003B: 0x5E, # SEMICOLON + 0x003C: 0x4C, # LESS-THAN SIGN + 0x003D: 0x7E, # EQUALS SIGN + 0x003E: 0x6E, # GREATER-THAN SIGN + 0x003F: 0x6F, # QUESTION MARK + 0x0040: 0x7C, # COMMERCIAL AT + 0x0041: 0xC1, # LATIN CAPITAL LETTER A + 0x0042: 0xC2, # LATIN CAPITAL LETTER B + 0x0043: 0xC3, # LATIN CAPITAL LETTER C + 0x0044: 0xC4, # LATIN CAPITAL LETTER D + 0x0045: 0xC5, # LATIN CAPITAL LETTER E + 0x0046: 0xC6, # LATIN CAPITAL LETTER F + 0x0047: 0xC7, # LATIN CAPITAL LETTER G + 0x0048: 0xC8, # LATIN CAPITAL LETTER H + 0x0049: 0xC9, # LATIN CAPITAL LETTER I + 0x004A: 0xD1, # LATIN CAPITAL LETTER J + 0x004B: 0xD2, # LATIN CAPITAL LETTER K + 0x004C: 0xD3, # LATIN CAPITAL LETTER L + 0x004D: 0xD4, # LATIN CAPITAL LETTER M + 0x004E: 0xD5, # LATIN CAPITAL LETTER N + 0x004F: 0xD6, # LATIN CAPITAL LETTER O + 0x0050: 0xD7, # LATIN CAPITAL LETTER P + 0x0051: 0xD8, # LATIN CAPITAL LETTER Q + 0x0052: 0xD9, # LATIN CAPITAL LETTER R + 0x0053: 0xE2, # LATIN CAPITAL LETTER S + 0x0054: 0xE3, # LATIN CAPITAL LETTER T + 0x0055: 0xE4, # LATIN CAPITAL LETTER U + 0x0056: 0xE5, # LATIN CAPITAL LETTER V + 0x0057: 0xE6, # LATIN CAPITAL LETTER W + 0x0058: 0xE7, # LATIN CAPITAL LETTER X + 0x0059: 0xE8, # LATIN CAPITAL LETTER Y + 0x005A: 0xE9, # LATIN CAPITAL LETTER Z + 0x005B: 0xBA, # LEFT SQUARE BRACKET + 0x005C: 0xE0, # REVERSE SOLIDUS + 0x005D: 0xBB, # RIGHT SQUARE BRACKET + 0x005E: 0xB0, # CIRCUMFLEX ACCENT + 0x005F: 0x6D, # LOW LINE + 0x0060: 0x79, # GRAVE ACCENT + 0x0061: 0x81, # LATIN SMALL LETTER A + 0x0062: 0x82, # LATIN SMALL LETTER B + 0x0063: 0x83, # LATIN SMALL LETTER C + 0x0064: 0x84, # LATIN SMALL LETTER D + 0x0065: 0x85, # LATIN SMALL LETTER E + 0x0066: 0x86, # LATIN SMALL LETTER F + 0x0067: 0x87, # LATIN SMALL LETTER G + 0x0068: 0x88, # LATIN SMALL LETTER H + 0x0069: 0x89, # LATIN SMALL LETTER I + 0x006A: 0x91, # LATIN SMALL LETTER J + 0x006B: 0x92, # LATIN SMALL LETTER K + 0x006C: 0x93, # LATIN SMALL LETTER L + 0x006D: 0x94, # LATIN SMALL LETTER M + 0x006E: 0x95, # LATIN SMALL LETTER N + 0x006F: 0x96, # LATIN SMALL LETTER O + 0x0070: 0x97, # LATIN SMALL LETTER P + 0x0071: 0x98, # LATIN SMALL LETTER Q + 0x0072: 0x99, # LATIN SMALL LETTER R + 0x0073: 0xA2, # LATIN SMALL LETTER S + 0x0074: 0xA3, # LATIN SMALL LETTER T + 0x0075: 0xA4, # LATIN SMALL LETTER U + 0x0076: 0xA5, # LATIN SMALL LETTER V + 0x0077: 0xA6, # LATIN SMALL LETTER W + 0x0078: 0xA7, # LATIN SMALL LETTER X + 0x0079: 0xA8, # LATIN SMALL LETTER Y + 0x007A: 0xA9, # LATIN SMALL LETTER Z + 0x007B: 0xC0, # LEFT CURLY BRACKET + 0x007C: 0x4F, # VERTICAL LINE + 0x007D: 0xD0, # RIGHT CURLY BRACKET + 0x007E: 0xA1, # TILDE + 0x007F: 0x07, # DELETE + 0x0080: 0x20, # CONTROL + 0x0081: 0x21, # CONTROL + 0x0082: 0x22, # CONTROL + 0x0083: 0x23, # CONTROL + 0x0084: 0x24, # CONTROL + 0x0085: 0x15, # CONTROL + 0x0086: 0x06, # CONTROL + 0x0087: 0x17, # CONTROL + 0x0088: 0x28, # CONTROL + 0x0089: 0x29, # CONTROL + 0x008A: 0x2A, # CONTROL + 0x008B: 0x2B, # CONTROL + 0x008C: 0x2C, # CONTROL + 0x008D: 0x09, # CONTROL + 0x008E: 0x0A, # CONTROL + 0x008F: 0x1B, # CONTROL + 0x0090: 0x30, # CONTROL + 0x0091: 0x31, # CONTROL + 0x0092: 0x1A, # CONTROL + 0x0093: 0x33, # CONTROL + 0x0094: 0x34, # CONTROL + 0x0095: 0x35, # CONTROL + 0x0096: 0x36, # CONTROL + 0x0097: 0x08, # CONTROL + 0x0098: 0x38, # CONTROL + 0x0099: 0x39, # CONTROL + 0x009A: 0x3A, # CONTROL + 0x009B: 0x3B, # CONTROL + 0x009C: 0x04, # CONTROL + 0x009D: 0x14, # CONTROL + 0x009E: 0x3E, # CONTROL + 0x009F: 0xFF, # CONTROL + 0x00A0: 0x41, # NO-BREAK SPACE + 0x00A1: 0xAA, # INVERTED EXCLAMATION MARK + 0x00A2: 0x4A, # CENT SIGN + 0x00A3: 0xB1, # POUND SIGN + 0x00A5: 0xB2, # YEN SIGN + 0x00A6: 0x6A, # BROKEN BAR + 0x00A7: 0xB5, # SECTION SIGN + 0x00A8: 0xBD, # DIAERESIS + 0x00A9: 0xB4, # COPYRIGHT SIGN + 0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR + 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0x5F, # NOT SIGN + 0x00AD: 0xCA, # SOFT HYPHEN + 0x00AE: 0xAF, # REGISTERED SIGN + 0x00AF: 0xBC, # MACRON + 0x00B0: 0x90, # DEGREE SIGN + 0x00B1: 0x8F, # PLUS-MINUS SIGN + 0x00B2: 0xEA, # SUPERSCRIPT TWO + 0x00B3: 0xFA, # SUPERSCRIPT THREE + 0x00B4: 0xBE, # ACUTE ACCENT + 0x00B5: 0xA0, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB3, # MIDDLE DOT + 0x00B8: 0x9D, # CEDILLA + 0x00B9: 0xDA, # SUPERSCRIPT ONE + 0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF + 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xAB, # INVERTED QUESTION MARK + 0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE + 0x00C7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D0: 0xAC, # LATIN CAPITAL LETTER ETH (ICELANDIC) + 0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xEC, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xBF, # MULTIPLICATION SIGN + 0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xFC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xAD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DE: 0xAE, # LATIN CAPITAL LETTER THORN (ICELANDIC) + 0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0x9C, # LATIN SMALL LIGATURE AE + 0x00E7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F0: 0x8C, # LATIN SMALL LETTER ETH (ICELANDIC) + 0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xCC, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xE1, # DIVISION SIGN + 0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xDC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0x8D, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FE: 0x8E, # LATIN SMALL LETTER THORN (ICELANDIC) + 0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x20AC: 0x9F, # EURO SIGN } - Modified: python/branches/ssize_t/Lib/encodings/cp1250.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp1250.py (original) +++ python/branches/ssize_t/Lib/encodings/cp1250.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,517 +32,516 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\ufffe' # 0x83 -> UNDEFINED - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\ufffe' # 0x88 -> UNDEFINED - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u015a' # 0x8C -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u0164' # 0x8D -> LATIN CAPITAL LETTER T WITH CARON - u'\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON - u'\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\ufffe' # 0x98 -> UNDEFINED - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u015b' # 0x9C -> LATIN SMALL LETTER S WITH ACUTE - u'\u0165' # 0x9D -> LATIN SMALL LETTER T WITH CARON - u'\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON - u'\u017a' # 0x9F -> LATIN SMALL LETTER Z WITH ACUTE - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u02c7' # 0xA1 -> CARON - u'\u02d8' # 0xA2 -> BREVE - u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\u0104' # 0xA5 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u02db' # 0xB2 -> OGONEK - u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\u0105' # 0xB9 -> LATIN SMALL LETTER A WITH OGONEK - u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u013d' # 0xBC -> LATIN CAPITAL LETTER L WITH CARON - u'\u02dd' # 0xBD -> DOUBLE ACUTE ACCENT - u'\u013e' # 0xBE -> LATIN SMALL LETTER L WITH CARON - u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u0154' # 0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0139' # 0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE - u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u011a' # 0xCC -> LATIN CAPITAL LETTER E WITH CARON - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u010e' # 0xCF -> LATIN CAPITAL LETTER D WITH CARON - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0147' # 0xD2 -> LATIN CAPITAL LETTER N WITH CARON - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u0158' # 0xD8 -> LATIN CAPITAL LETTER R WITH CARON - u'\u016e' # 0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\u0170' # 0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\u0162' # 0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\u0155' # 0xE0 -> LATIN SMALL LETTER R WITH ACUTE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u013a' # 0xE5 -> LATIN SMALL LETTER L WITH ACUTE - u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u011b' # 0xEC -> LATIN SMALL LETTER E WITH CARON - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u010f' # 0xEF -> LATIN SMALL LETTER D WITH CARON - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0148' # 0xF2 -> LATIN SMALL LETTER N WITH CARON - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u0159' # 0xF8 -> LATIN SMALL LETTER R WITH CARON - u'\u016f' # 0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\u0171' # 0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\u0163' # 0xFE -> LATIN SMALL LETTER T WITH CEDILLA - u'\u02d9' # 0xFF -> DOT ABOVE + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\ufffe' # 0x81 -> UNDEFINED + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\ufffe' # 0x83 -> UNDEFINED + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\ufffe' # 0x88 -> UNDEFINED + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u015a' # 0x8C -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u0164' # 0x8D -> LATIN CAPITAL LETTER T WITH CARON + u'\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON + u'\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\ufffe' # 0x90 -> UNDEFINED + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\ufffe' # 0x98 -> UNDEFINED + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u015b' # 0x9C -> LATIN SMALL LETTER S WITH ACUTE + u'\u0165' # 0x9D -> LATIN SMALL LETTER T WITH CARON + u'\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON + u'\u017a' # 0x9F -> LATIN SMALL LETTER Z WITH ACUTE + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u02c7' # 0xA1 -> CARON + u'\u02d8' # 0xA2 -> BREVE + u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\u0104' # 0xA5 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u02db' # 0xB2 -> OGONEK + u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\u0105' # 0xB9 -> LATIN SMALL LETTER A WITH OGONEK + u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u013d' # 0xBC -> LATIN CAPITAL LETTER L WITH CARON + u'\u02dd' # 0xBD -> DOUBLE ACUTE ACCENT + u'\u013e' # 0xBE -> LATIN SMALL LETTER L WITH CARON + u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u0154' # 0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0139' # 0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE + u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u011a' # 0xCC -> LATIN CAPITAL LETTER E WITH CARON + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u010e' # 0xCF -> LATIN CAPITAL LETTER D WITH CARON + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0147' # 0xD2 -> LATIN CAPITAL LETTER N WITH CARON + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u0158' # 0xD8 -> LATIN CAPITAL LETTER R WITH CARON + u'\u016e' # 0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\u0170' # 0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0162' # 0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\u0155' # 0xE0 -> LATIN SMALL LETTER R WITH ACUTE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u013a' # 0xE5 -> LATIN SMALL LETTER L WITH ACUTE + u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u011b' # 0xEC -> LATIN SMALL LETTER E WITH CARON + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u010f' # 0xEF -> LATIN SMALL LETTER D WITH CARON + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0148' # 0xF2 -> LATIN SMALL LETTER N WITH CARON + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u0159' # 0xF8 -> LATIN SMALL LETTER R WITH CARON + u'\u016f' # 0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\u0171' # 0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0163' # 0xFE -> LATIN SMALL LETTER T WITH CEDILLA + u'\u02d9' # 0xFF -> DOT ABOVE ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE - 0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE - 0x0104: 0xA5, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xB9, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0xC6, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xE6, # LATIN SMALL LETTER C WITH ACUTE - 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON - 0x010E: 0xCF, # LATIN CAPITAL LETTER D WITH CARON - 0x010F: 0xEF, # LATIN SMALL LETTER D WITH CARON - 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE - 0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK - 0x011A: 0xCC, # LATIN CAPITAL LETTER E WITH CARON - 0x011B: 0xEC, # LATIN SMALL LETTER E WITH CARON - 0x0139: 0xC5, # LATIN CAPITAL LETTER L WITH ACUTE - 0x013A: 0xE5, # LATIN SMALL LETTER L WITH ACUTE - 0x013D: 0xBC, # LATIN CAPITAL LETTER L WITH CARON - 0x013E: 0xBE, # LATIN SMALL LETTER L WITH CARON - 0x0141: 0xA3, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xB3, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE - 0x0147: 0xD2, # LATIN CAPITAL LETTER N WITH CARON - 0x0148: 0xF2, # LATIN SMALL LETTER N WITH CARON - 0x0150: 0xD5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x0151: 0xF5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x0154: 0xC0, # LATIN CAPITAL LETTER R WITH ACUTE - 0x0155: 0xE0, # LATIN SMALL LETTER R WITH ACUTE - 0x0158: 0xD8, # LATIN CAPITAL LETTER R WITH CARON - 0x0159: 0xF8, # LATIN SMALL LETTER R WITH CARON - 0x015A: 0x8C, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015B: 0x9C, # LATIN SMALL LETTER S WITH ACUTE - 0x015E: 0xAA, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015F: 0xBA, # LATIN SMALL LETTER S WITH CEDILLA - 0x0160: 0x8A, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0x9A, # LATIN SMALL LETTER S WITH CARON - 0x0162: 0xDE, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x0163: 0xFE, # LATIN SMALL LETTER T WITH CEDILLA - 0x0164: 0x8D, # LATIN CAPITAL LETTER T WITH CARON - 0x0165: 0x9D, # LATIN SMALL LETTER T WITH CARON - 0x016E: 0xD9, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x016F: 0xF9, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0170: 0xDB, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x0171: 0xFB, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x0179: 0x8F, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017A: 0x9F, # LATIN SMALL LETTER Z WITH ACUTE - 0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017D: 0x8E, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0x9E, # LATIN SMALL LETTER Z WITH CARON - 0x02C7: 0xA1, # CARON - 0x02D8: 0xA2, # BREVE - 0x02D9: 0xFF, # DOT ABOVE - 0x02DB: 0xB2, # OGONEK - 0x02DD: 0xBD, # DOUBLE ACUTE ACCENT - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AC: 0x80, # EURO SIGN - 0x2122: 0x99, # TRADE MARK SIGN + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE + 0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE + 0x0104: 0xA5, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0xB9, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0xC6, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0xE6, # LATIN SMALL LETTER C WITH ACUTE + 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON + 0x010E: 0xCF, # LATIN CAPITAL LETTER D WITH CARON + 0x010F: 0xEF, # LATIN SMALL LETTER D WITH CARON + 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE + 0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK + 0x011A: 0xCC, # LATIN CAPITAL LETTER E WITH CARON + 0x011B: 0xEC, # LATIN SMALL LETTER E WITH CARON + 0x0139: 0xC5, # LATIN CAPITAL LETTER L WITH ACUTE + 0x013A: 0xE5, # LATIN SMALL LETTER L WITH ACUTE + 0x013D: 0xBC, # LATIN CAPITAL LETTER L WITH CARON + 0x013E: 0xBE, # LATIN SMALL LETTER L WITH CARON + 0x0141: 0xA3, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0xB3, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE + 0x0147: 0xD2, # LATIN CAPITAL LETTER N WITH CARON + 0x0148: 0xF2, # LATIN SMALL LETTER N WITH CARON + 0x0150: 0xD5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0xF5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0154: 0xC0, # LATIN CAPITAL LETTER R WITH ACUTE + 0x0155: 0xE0, # LATIN SMALL LETTER R WITH ACUTE + 0x0158: 0xD8, # LATIN CAPITAL LETTER R WITH CARON + 0x0159: 0xF8, # LATIN SMALL LETTER R WITH CARON + 0x015A: 0x8C, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015B: 0x9C, # LATIN SMALL LETTER S WITH ACUTE + 0x015E: 0xAA, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015F: 0xBA, # LATIN SMALL LETTER S WITH CEDILLA + 0x0160: 0x8A, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x9A, # LATIN SMALL LETTER S WITH CARON + 0x0162: 0xDE, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x0163: 0xFE, # LATIN SMALL LETTER T WITH CEDILLA + 0x0164: 0x8D, # LATIN CAPITAL LETTER T WITH CARON + 0x0165: 0x9D, # LATIN SMALL LETTER T WITH CARON + 0x016E: 0xD9, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x016F: 0xF9, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0170: 0xDB, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0xFB, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0179: 0x8F, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017A: 0x9F, # LATIN SMALL LETTER Z WITH ACUTE + 0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017D: 0x8E, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0x9E, # LATIN SMALL LETTER Z WITH CARON + 0x02C7: 0xA1, # CARON + 0x02D8: 0xA2, # BREVE + 0x02D9: 0xFF, # DOT ABOVE + 0x02DB: 0xB2, # OGONEK + 0x02DD: 0xBD, # DOUBLE ACUTE ACCENT + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AC: 0x80, # EURO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } - Modified: python/branches/ssize_t/Lib/encodings/cp1251.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp1251.py (original) +++ python/branches/ssize_t/Lib/encodings/cp1251.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,521 +32,520 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u0402' # 0x80 -> CYRILLIC CAPITAL LETTER DJE - u'\u0403' # 0x81 -> CYRILLIC CAPITAL LETTER GJE - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0453' # 0x83 -> CYRILLIC SMALL LETTER GJE - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u20ac' # 0x88 -> EURO SIGN - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0409' # 0x8A -> CYRILLIC CAPITAL LETTER LJE - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u040a' # 0x8C -> CYRILLIC CAPITAL LETTER NJE - u'\u040c' # 0x8D -> CYRILLIC CAPITAL LETTER KJE - u'\u040b' # 0x8E -> CYRILLIC CAPITAL LETTER TSHE - u'\u040f' # 0x8F -> CYRILLIC CAPITAL LETTER DZHE - u'\u0452' # 0x90 -> CYRILLIC SMALL LETTER DJE - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\ufffe' # 0x98 -> UNDEFINED - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0459' # 0x9A -> CYRILLIC SMALL LETTER LJE - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u045a' # 0x9C -> CYRILLIC SMALL LETTER NJE - u'\u045c' # 0x9D -> CYRILLIC SMALL LETTER KJE - u'\u045b' # 0x9E -> CYRILLIC SMALL LETTER TSHE - u'\u045f' # 0x9F -> CYRILLIC SMALL LETTER DZHE - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u040e' # 0xA1 -> CYRILLIC CAPITAL LETTER SHORT U - u'\u045e' # 0xA2 -> CYRILLIC SMALL LETTER SHORT U - u'\u0408' # 0xA3 -> CYRILLIC CAPITAL LETTER JE - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\u0490' # 0xA5 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\u0401' # 0xA8 -> CYRILLIC CAPITAL LETTER IO - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u0404' # 0xAA -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\u0407' # 0xAF -> CYRILLIC CAPITAL LETTER YI - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u0406' # 0xB2 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0456' # 0xB3 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0491' # 0xB4 -> CYRILLIC SMALL LETTER GHE WITH UPTURN - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u0451' # 0xB8 -> CYRILLIC SMALL LETTER IO - u'\u2116' # 0xB9 -> NUMERO SIGN - u'\u0454' # 0xBA -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u0458' # 0xBC -> CYRILLIC SMALL LETTER JE - u'\u0405' # 0xBD -> CYRILLIC CAPITAL LETTER DZE - u'\u0455' # 0xBE -> CYRILLIC SMALL LETTER DZE - u'\u0457' # 0xBF -> CYRILLIC SMALL LETTER YI - u'\u0410' # 0xC0 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0xC1 -> CYRILLIC CAPITAL LETTER BE - u'\u0412' # 0xC2 -> CYRILLIC CAPITAL LETTER VE - u'\u0413' # 0xC3 -> CYRILLIC CAPITAL LETTER GHE - u'\u0414' # 0xC4 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0xC5 -> CYRILLIC CAPITAL LETTER IE - u'\u0416' # 0xC6 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0417' # 0xC7 -> CYRILLIC CAPITAL LETTER ZE - u'\u0418' # 0xC8 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0xC9 -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0xCA -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0xCB -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0xCC -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0xCD -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0xCE -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0xCF -> CYRILLIC CAPITAL LETTER PE - u'\u0420' # 0xD0 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0xD1 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0xD2 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0xD3 -> CYRILLIC CAPITAL LETTER U - u'\u0424' # 0xD4 -> CYRILLIC CAPITAL LETTER EF - u'\u0425' # 0xD5 -> CYRILLIC CAPITAL LETTER HA - u'\u0426' # 0xD6 -> CYRILLIC CAPITAL LETTER TSE - u'\u0427' # 0xD7 -> CYRILLIC CAPITAL LETTER CHE - u'\u0428' # 0xD8 -> CYRILLIC CAPITAL LETTER SHA - u'\u0429' # 0xD9 -> CYRILLIC CAPITAL LETTER SHCHA - u'\u042a' # 0xDA -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u042b' # 0xDB -> CYRILLIC CAPITAL LETTER YERU - u'\u042c' # 0xDC -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042d' # 0xDD -> CYRILLIC CAPITAL LETTER E - u'\u042e' # 0xDE -> CYRILLIC CAPITAL LETTER YU - u'\u042f' # 0xDF -> CYRILLIC CAPITAL LETTER YA - u'\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE - u'\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE - u'\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE - u'\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE - u'\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE - u'\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE - u'\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xED -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xEE -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE - u'\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U - u'\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF - u'\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA - u'\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE - u'\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE - u'\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA - u'\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA - u'\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN - u'\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU - u'\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044d' # 0xFD -> CYRILLIC SMALL LETTER E - u'\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU - u'\u044f' # 0xFF -> CYRILLIC SMALL LETTER YA + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u0402' # 0x80 -> CYRILLIC CAPITAL LETTER DJE + u'\u0403' # 0x81 -> CYRILLIC CAPITAL LETTER GJE + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\u0453' # 0x83 -> CYRILLIC SMALL LETTER GJE + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\u20ac' # 0x88 -> EURO SIGN + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\u0409' # 0x8A -> CYRILLIC CAPITAL LETTER LJE + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u040a' # 0x8C -> CYRILLIC CAPITAL LETTER NJE + u'\u040c' # 0x8D -> CYRILLIC CAPITAL LETTER KJE + u'\u040b' # 0x8E -> CYRILLIC CAPITAL LETTER TSHE + u'\u040f' # 0x8F -> CYRILLIC CAPITAL LETTER DZHE + u'\u0452' # 0x90 -> CYRILLIC SMALL LETTER DJE + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\ufffe' # 0x98 -> UNDEFINED + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\u0459' # 0x9A -> CYRILLIC SMALL LETTER LJE + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u045a' # 0x9C -> CYRILLIC SMALL LETTER NJE + u'\u045c' # 0x9D -> CYRILLIC SMALL LETTER KJE + u'\u045b' # 0x9E -> CYRILLIC SMALL LETTER TSHE + u'\u045f' # 0x9F -> CYRILLIC SMALL LETTER DZHE + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u040e' # 0xA1 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045e' # 0xA2 -> CYRILLIC SMALL LETTER SHORT U + u'\u0408' # 0xA3 -> CYRILLIC CAPITAL LETTER JE + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\u0490' # 0xA5 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\u0401' # 0xA8 -> CYRILLIC CAPITAL LETTER IO + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u0404' # 0xAA -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\u0407' # 0xAF -> CYRILLIC CAPITAL LETTER YI + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u0406' # 0xB2 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0456' # 0xB3 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0491' # 0xB4 -> CYRILLIC SMALL LETTER GHE WITH UPTURN + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u0451' # 0xB8 -> CYRILLIC SMALL LETTER IO + u'\u2116' # 0xB9 -> NUMERO SIGN + u'\u0454' # 0xBA -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0458' # 0xBC -> CYRILLIC SMALL LETTER JE + u'\u0405' # 0xBD -> CYRILLIC CAPITAL LETTER DZE + u'\u0455' # 0xBE -> CYRILLIC SMALL LETTER DZE + u'\u0457' # 0xBF -> CYRILLIC SMALL LETTER YI + u'\u0410' # 0xC0 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0xC1 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0xC2 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0xC3 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0xC4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0xC5 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0xC6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0xC7 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0xC8 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0xC9 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0xCA -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0xCB -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0xCC -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0xCD -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0xCE -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0xCF -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0xD0 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0xD1 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0xD2 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0xD3 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0xD4 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0xD5 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0xD6 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0xD7 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0xD8 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0xD9 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0xDA -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0xDB -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0xDC -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0xDD -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0xDE -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0xDF -> CYRILLIC CAPITAL LETTER YA + u'\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0xED -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0xEE -> CYRILLIC SMALL LETTER O + u'\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE + u'\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0xFD -> CYRILLIC SMALL LETTER E + u'\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU + u'\u044f' # 0xFF -> CYRILLIC SMALL LETTER YA ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x0401: 0xA8, # CYRILLIC CAPITAL LETTER IO - 0x0402: 0x80, # CYRILLIC CAPITAL LETTER DJE - 0x0403: 0x81, # CYRILLIC CAPITAL LETTER GJE - 0x0404: 0xAA, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0405: 0xBD, # CYRILLIC CAPITAL LETTER DZE - 0x0406: 0xB2, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0407: 0xAF, # CYRILLIC CAPITAL LETTER YI - 0x0408: 0xA3, # CYRILLIC CAPITAL LETTER JE - 0x0409: 0x8A, # CYRILLIC CAPITAL LETTER LJE - 0x040A: 0x8C, # CYRILLIC CAPITAL LETTER NJE - 0x040B: 0x8E, # CYRILLIC CAPITAL LETTER TSHE - 0x040C: 0x8D, # CYRILLIC CAPITAL LETTER KJE - 0x040E: 0xA1, # CYRILLIC CAPITAL LETTER SHORT U - 0x040F: 0x8F, # CYRILLIC CAPITAL LETTER DZHE - 0x0410: 0xC0, # CYRILLIC CAPITAL LETTER A - 0x0411: 0xC1, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0xC2, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0xC3, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0xC4, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0xC5, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0xC6, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0xC7, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0xC8, # CYRILLIC CAPITAL LETTER I - 0x0419: 0xC9, # CYRILLIC CAPITAL LETTER SHORT I - 0x041A: 0xCA, # CYRILLIC CAPITAL LETTER KA - 0x041B: 0xCB, # CYRILLIC CAPITAL LETTER EL - 0x041C: 0xCC, # CYRILLIC CAPITAL LETTER EM - 0x041D: 0xCD, # CYRILLIC CAPITAL LETTER EN - 0x041E: 0xCE, # CYRILLIC CAPITAL LETTER O - 0x041F: 0xCF, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0xD0, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0xD1, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0xD2, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0xD3, # CYRILLIC CAPITAL LETTER U - 0x0424: 0xD4, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0xD5, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0xD6, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0xD7, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0xD8, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0xD9, # CYRILLIC CAPITAL LETTER SHCHA - 0x042A: 0xDA, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042B: 0xDB, # CYRILLIC CAPITAL LETTER YERU - 0x042C: 0xDC, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042D: 0xDD, # CYRILLIC CAPITAL LETTER E - 0x042E: 0xDE, # CYRILLIC CAPITAL LETTER YU - 0x042F: 0xDF, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0xE0, # CYRILLIC SMALL LETTER A - 0x0431: 0xE1, # CYRILLIC SMALL LETTER BE - 0x0432: 0xE2, # CYRILLIC SMALL LETTER VE - 0x0433: 0xE3, # CYRILLIC SMALL LETTER GHE - 0x0434: 0xE4, # CYRILLIC SMALL LETTER DE - 0x0435: 0xE5, # CYRILLIC SMALL LETTER IE - 0x0436: 0xE6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0xE7, # CYRILLIC SMALL LETTER ZE - 0x0438: 0xE8, # CYRILLIC SMALL LETTER I - 0x0439: 0xE9, # CYRILLIC SMALL LETTER SHORT I - 0x043A: 0xEA, # CYRILLIC SMALL LETTER KA - 0x043B: 0xEB, # CYRILLIC SMALL LETTER EL - 0x043C: 0xEC, # CYRILLIC SMALL LETTER EM - 0x043D: 0xED, # CYRILLIC SMALL LETTER EN - 0x043E: 0xEE, # CYRILLIC SMALL LETTER O - 0x043F: 0xEF, # CYRILLIC SMALL LETTER PE - 0x0440: 0xF0, # CYRILLIC SMALL LETTER ER - 0x0441: 0xF1, # CYRILLIC SMALL LETTER ES - 0x0442: 0xF2, # CYRILLIC SMALL LETTER TE - 0x0443: 0xF3, # CYRILLIC SMALL LETTER U - 0x0444: 0xF4, # CYRILLIC SMALL LETTER EF - 0x0445: 0xF5, # CYRILLIC SMALL LETTER HA - 0x0446: 0xF6, # CYRILLIC SMALL LETTER TSE - 0x0447: 0xF7, # CYRILLIC SMALL LETTER CHE - 0x0448: 0xF8, # CYRILLIC SMALL LETTER SHA - 0x0449: 0xF9, # CYRILLIC SMALL LETTER SHCHA - 0x044A: 0xFA, # CYRILLIC SMALL LETTER HARD SIGN - 0x044B: 0xFB, # CYRILLIC SMALL LETTER YERU - 0x044C: 0xFC, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044D: 0xFD, # CYRILLIC SMALL LETTER E - 0x044E: 0xFE, # CYRILLIC SMALL LETTER YU - 0x044F: 0xFF, # CYRILLIC SMALL LETTER YA - 0x0451: 0xB8, # CYRILLIC SMALL LETTER IO - 0x0452: 0x90, # CYRILLIC SMALL LETTER DJE - 0x0453: 0x83, # CYRILLIC SMALL LETTER GJE - 0x0454: 0xBA, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0455: 0xBE, # CYRILLIC SMALL LETTER DZE - 0x0456: 0xB3, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0457: 0xBF, # CYRILLIC SMALL LETTER YI - 0x0458: 0xBC, # CYRILLIC SMALL LETTER JE - 0x0459: 0x9A, # CYRILLIC SMALL LETTER LJE - 0x045A: 0x9C, # CYRILLIC SMALL LETTER NJE - 0x045B: 0x9E, # CYRILLIC SMALL LETTER TSHE - 0x045C: 0x9D, # CYRILLIC SMALL LETTER KJE - 0x045E: 0xA2, # CYRILLIC SMALL LETTER SHORT U - 0x045F: 0x9F, # CYRILLIC SMALL LETTER DZHE - 0x0490: 0xA5, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN - 0x0491: 0xB4, # CYRILLIC SMALL LETTER GHE WITH UPTURN - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AC: 0x88, # EURO SIGN - 0x2116: 0xB9, # NUMERO SIGN - 0x2122: 0x99, # TRADE MARK SIGN + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x0401: 0xA8, # CYRILLIC CAPITAL LETTER IO + 0x0402: 0x80, # CYRILLIC CAPITAL LETTER DJE + 0x0403: 0x81, # CYRILLIC CAPITAL LETTER GJE + 0x0404: 0xAA, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0405: 0xBD, # CYRILLIC CAPITAL LETTER DZE + 0x0406: 0xB2, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0xAF, # CYRILLIC CAPITAL LETTER YI + 0x0408: 0xA3, # CYRILLIC CAPITAL LETTER JE + 0x0409: 0x8A, # CYRILLIC CAPITAL LETTER LJE + 0x040A: 0x8C, # CYRILLIC CAPITAL LETTER NJE + 0x040B: 0x8E, # CYRILLIC CAPITAL LETTER TSHE + 0x040C: 0x8D, # CYRILLIC CAPITAL LETTER KJE + 0x040E: 0xA1, # CYRILLIC CAPITAL LETTER SHORT U + 0x040F: 0x8F, # CYRILLIC CAPITAL LETTER DZHE + 0x0410: 0xC0, # CYRILLIC CAPITAL LETTER A + 0x0411: 0xC1, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0xC2, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0xC3, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0xC4, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0xC5, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0xC6, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0xC7, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0xC8, # CYRILLIC CAPITAL LETTER I + 0x0419: 0xC9, # CYRILLIC CAPITAL LETTER SHORT I + 0x041A: 0xCA, # CYRILLIC CAPITAL LETTER KA + 0x041B: 0xCB, # CYRILLIC CAPITAL LETTER EL + 0x041C: 0xCC, # CYRILLIC CAPITAL LETTER EM + 0x041D: 0xCD, # CYRILLIC CAPITAL LETTER EN + 0x041E: 0xCE, # CYRILLIC CAPITAL LETTER O + 0x041F: 0xCF, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0xD0, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0xD1, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0xD2, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0xD3, # CYRILLIC CAPITAL LETTER U + 0x0424: 0xD4, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0xD5, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0xD6, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0xD7, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0xD8, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0xD9, # CYRILLIC CAPITAL LETTER SHCHA + 0x042A: 0xDA, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042B: 0xDB, # CYRILLIC CAPITAL LETTER YERU + 0x042C: 0xDC, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042D: 0xDD, # CYRILLIC CAPITAL LETTER E + 0x042E: 0xDE, # CYRILLIC CAPITAL LETTER YU + 0x042F: 0xDF, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0xE0, # CYRILLIC SMALL LETTER A + 0x0431: 0xE1, # CYRILLIC SMALL LETTER BE + 0x0432: 0xE2, # CYRILLIC SMALL LETTER VE + 0x0433: 0xE3, # CYRILLIC SMALL LETTER GHE + 0x0434: 0xE4, # CYRILLIC SMALL LETTER DE + 0x0435: 0xE5, # CYRILLIC SMALL LETTER IE + 0x0436: 0xE6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0xE7, # CYRILLIC SMALL LETTER ZE + 0x0438: 0xE8, # CYRILLIC SMALL LETTER I + 0x0439: 0xE9, # CYRILLIC SMALL LETTER SHORT I + 0x043A: 0xEA, # CYRILLIC SMALL LETTER KA + 0x043B: 0xEB, # CYRILLIC SMALL LETTER EL + 0x043C: 0xEC, # CYRILLIC SMALL LETTER EM + 0x043D: 0xED, # CYRILLIC SMALL LETTER EN + 0x043E: 0xEE, # CYRILLIC SMALL LETTER O + 0x043F: 0xEF, # CYRILLIC SMALL LETTER PE + 0x0440: 0xF0, # CYRILLIC SMALL LETTER ER + 0x0441: 0xF1, # CYRILLIC SMALL LETTER ES + 0x0442: 0xF2, # CYRILLIC SMALL LETTER TE + 0x0443: 0xF3, # CYRILLIC SMALL LETTER U + 0x0444: 0xF4, # CYRILLIC SMALL LETTER EF + 0x0445: 0xF5, # CYRILLIC SMALL LETTER HA + 0x0446: 0xF6, # CYRILLIC SMALL LETTER TSE + 0x0447: 0xF7, # CYRILLIC SMALL LETTER CHE + 0x0448: 0xF8, # CYRILLIC SMALL LETTER SHA + 0x0449: 0xF9, # CYRILLIC SMALL LETTER SHCHA + 0x044A: 0xFA, # CYRILLIC SMALL LETTER HARD SIGN + 0x044B: 0xFB, # CYRILLIC SMALL LETTER YERU + 0x044C: 0xFC, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044D: 0xFD, # CYRILLIC SMALL LETTER E + 0x044E: 0xFE, # CYRILLIC SMALL LETTER YU + 0x044F: 0xFF, # CYRILLIC SMALL LETTER YA + 0x0451: 0xB8, # CYRILLIC SMALL LETTER IO + 0x0452: 0x90, # CYRILLIC SMALL LETTER DJE + 0x0453: 0x83, # CYRILLIC SMALL LETTER GJE + 0x0454: 0xBA, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0455: 0xBE, # CYRILLIC SMALL LETTER DZE + 0x0456: 0xB3, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0xBF, # CYRILLIC SMALL LETTER YI + 0x0458: 0xBC, # CYRILLIC SMALL LETTER JE + 0x0459: 0x9A, # CYRILLIC SMALL LETTER LJE + 0x045A: 0x9C, # CYRILLIC SMALL LETTER NJE + 0x045B: 0x9E, # CYRILLIC SMALL LETTER TSHE + 0x045C: 0x9D, # CYRILLIC SMALL LETTER KJE + 0x045E: 0xA2, # CYRILLIC SMALL LETTER SHORT U + 0x045F: 0x9F, # CYRILLIC SMALL LETTER DZHE + 0x0490: 0xA5, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN + 0x0491: 0xB4, # CYRILLIC SMALL LETTER GHE WITH UPTURN + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AC: 0x88, # EURO SIGN + 0x2116: 0xB9, # NUMERO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } - Modified: python/branches/ssize_t/Lib/encodings/cp1252.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp1252.py (original) +++ python/branches/ssize_t/Lib/encodings/cp1252.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,517 +32,516 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE - u'\ufffe' # 0x8D -> UNDEFINED - u'\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\u02dc' # 0x98 -> SMALL TILDE - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE - u'\ufffe' # 0x9D -> UNDEFINED - u'\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON - u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\ufffe' # 0x81 -> UNDEFINED + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE + u'\ufffe' # 0x8D -> UNDEFINED + u'\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON + u'\ufffe' # 0x8F -> UNDEFINED + u'\ufffe' # 0x90 -> UNDEFINED + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\u02dc' # 0x98 -> SMALL TILDE + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE + u'\ufffe' # 0x9D -> UNDEFINED + u'\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON + u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xBF, # INVERTED QUESTION MARK - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH - 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F0: 0xF0, # LATIN SMALL LETTER ETH - 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FE: 0xFE, # LATIN SMALL LETTER THORN - 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE - 0x0153: 0x9C, # LATIN SMALL LIGATURE OE - 0x0160: 0x8A, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0x9A, # LATIN SMALL LETTER S WITH CARON - 0x0178: 0x9F, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x017D: 0x8E, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0x9E, # LATIN SMALL LETTER Z WITH CARON - 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02DC: 0x98, # SMALL TILDE - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AC: 0x80, # EURO SIGN - 0x2122: 0x99, # TRADE MARK SIGN + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xBF, # INVERTED QUESTION MARK + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH + 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F0: 0xF0, # LATIN SMALL LETTER ETH + 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FE: 0xFE, # LATIN SMALL LETTER THORN + 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x9C, # LATIN SMALL LIGATURE OE + 0x0160: 0x8A, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x9A, # LATIN SMALL LETTER S WITH CARON + 0x0178: 0x9F, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x017D: 0x8E, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0x9E, # LATIN SMALL LETTER Z WITH CARON + 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK + 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02DC: 0x98, # SMALL TILDE + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AC: 0x80, # EURO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } - Modified: python/branches/ssize_t/Lib/encodings/cp1253.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp1253.py (original) +++ python/branches/ssize_t/Lib/encodings/cp1253.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,505 +32,504 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\ufffe' # 0x88 -> UNDEFINED - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\ufffe' # 0x8A -> UNDEFINED - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x8C -> UNDEFINED - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\ufffe' # 0x98 -> UNDEFINED - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\ufffe' # 0x9A -> UNDEFINED - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x9C -> UNDEFINED - u'\ufffe' # 0x9D -> UNDEFINED - u'\ufffe' # 0x9E -> UNDEFINED - u'\ufffe' # 0x9F -> UNDEFINED - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0385' # 0xA1 -> GREEK DIALYTIKA TONOS - u'\u0386' # 0xA2 -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\ufffe' # 0xAA -> UNDEFINED - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\u2015' # 0xAF -> HORIZONTAL BAR - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\u0384' # 0xB4 -> GREEK TONOS - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u0388' # 0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0389' # 0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u038c' # 0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\u038e' # 0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u038f' # 0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\u0390' # 0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u0391' # 0xC1 -> GREEK CAPITAL LETTER ALPHA - u'\u0392' # 0xC2 -> GREEK CAPITAL LETTER BETA - u'\u0393' # 0xC3 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0xC4 -> GREEK CAPITAL LETTER DELTA - u'\u0395' # 0xC5 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0xC6 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0xC7 -> GREEK CAPITAL LETTER ETA - u'\u0398' # 0xC8 -> GREEK CAPITAL LETTER THETA - u'\u0399' # 0xC9 -> GREEK CAPITAL LETTER IOTA - u'\u039a' # 0xCA -> GREEK CAPITAL LETTER KAPPA - u'\u039b' # 0xCB -> GREEK CAPITAL LETTER LAMDA - u'\u039c' # 0xCC -> GREEK CAPITAL LETTER MU - u'\u039d' # 0xCD -> GREEK CAPITAL LETTER NU - u'\u039e' # 0xCE -> GREEK CAPITAL LETTER XI - u'\u039f' # 0xCF -> GREEK CAPITAL LETTER OMICRON - u'\u03a0' # 0xD0 -> GREEK CAPITAL LETTER PI - u'\u03a1' # 0xD1 -> GREEK CAPITAL LETTER RHO - u'\ufffe' # 0xD2 -> UNDEFINED - u'\u03a3' # 0xD3 -> GREEK CAPITAL LETTER SIGMA - u'\u03a4' # 0xD4 -> GREEK CAPITAL LETTER TAU - u'\u03a5' # 0xD5 -> GREEK CAPITAL LETTER UPSILON - u'\u03a6' # 0xD6 -> GREEK CAPITAL LETTER PHI - u'\u03a7' # 0xD7 -> GREEK CAPITAL LETTER CHI - u'\u03a8' # 0xD8 -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0xD9 -> GREEK CAPITAL LETTER OMEGA - u'\u03aa' # 0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\u03ab' # 0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\u03ac' # 0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u03ad' # 0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0xDE -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03af' # 0xDF -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03b0' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA - u'\u03b3' # 0xE3 -> GREEK SMALL LETTER GAMMA - u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON - u'\u03b6' # 0xE6 -> GREEK SMALL LETTER ZETA - u'\u03b7' # 0xE7 -> GREEK SMALL LETTER ETA - u'\u03b8' # 0xE8 -> GREEK SMALL LETTER THETA - u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0xEA -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0xEB -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0xEC -> GREEK SMALL LETTER MU - u'\u03bd' # 0xED -> GREEK SMALL LETTER NU - u'\u03be' # 0xEE -> GREEK SMALL LETTER XI - u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI - u'\u03c1' # 0xF1 -> GREEK SMALL LETTER RHO - u'\u03c2' # 0xF2 -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA - u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU - u'\u03c5' # 0xF5 -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0xF6 -> GREEK SMALL LETTER PHI - u'\u03c7' # 0xF7 -> GREEK SMALL LETTER CHI - u'\u03c8' # 0xF8 -> GREEK SMALL LETTER PSI - u'\u03c9' # 0xF9 -> GREEK SMALL LETTER OMEGA - u'\u03ca' # 0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03cb' # 0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03cc' # 0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03ce' # 0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\ufffe' # 0xFF -> UNDEFINED + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\ufffe' # 0x81 -> UNDEFINED + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\ufffe' # 0x88 -> UNDEFINED + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\ufffe' # 0x8A -> UNDEFINED + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x8C -> UNDEFINED + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\ufffe' # 0x8F -> UNDEFINED + u'\ufffe' # 0x90 -> UNDEFINED + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\ufffe' # 0x98 -> UNDEFINED + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\ufffe' # 0x9A -> UNDEFINED + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x9C -> UNDEFINED + u'\ufffe' # 0x9D -> UNDEFINED + u'\ufffe' # 0x9E -> UNDEFINED + u'\ufffe' # 0x9F -> UNDEFINED + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0385' # 0xA1 -> GREEK DIALYTIKA TONOS + u'\u0386' # 0xA2 -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\ufffe' # 0xAA -> UNDEFINED + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\u2015' # 0xAF -> HORIZONTAL BAR + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\u0384' # 0xB4 -> GREEK TONOS + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u0388' # 0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0389' # 0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u038c' # 0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\u038e' # 0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u038f' # 0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\u0390' # 0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u0391' # 0xC1 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0xC2 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0xC3 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0xC4 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0xC5 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0xC6 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0xC7 -> GREEK CAPITAL LETTER ETA + u'\u0398' # 0xC8 -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0xC9 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0xCA -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0xCB -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0xCC -> GREEK CAPITAL LETTER MU + u'\u039d' # 0xCD -> GREEK CAPITAL LETTER NU + u'\u039e' # 0xCE -> GREEK CAPITAL LETTER XI + u'\u039f' # 0xCF -> GREEK CAPITAL LETTER OMICRON + u'\u03a0' # 0xD0 -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0xD1 -> GREEK CAPITAL LETTER RHO + u'\ufffe' # 0xD2 -> UNDEFINED + u'\u03a3' # 0xD3 -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0xD4 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0xD5 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0xD6 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0xD7 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0xD8 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0xD9 -> GREEK CAPITAL LETTER OMEGA + u'\u03aa' # 0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u03ab' # 0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\u03ac' # 0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u03ad' # 0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0xDE -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0xDF -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03b0' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0xE3 -> GREEK SMALL LETTER GAMMA + u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON + u'\u03b6' # 0xE6 -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0xE7 -> GREEK SMALL LETTER ETA + u'\u03b8' # 0xE8 -> GREEK SMALL LETTER THETA + u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0xEA -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0xEB -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0xEC -> GREEK SMALL LETTER MU + u'\u03bd' # 0xED -> GREEK SMALL LETTER NU + u'\u03be' # 0xEE -> GREEK SMALL LETTER XI + u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI + u'\u03c1' # 0xF1 -> GREEK SMALL LETTER RHO + u'\u03c2' # 0xF2 -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA + u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU + u'\u03c5' # 0xF5 -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0xF6 -> GREEK SMALL LETTER PHI + u'\u03c7' # 0xF7 -> GREEK SMALL LETTER CHI + u'\u03c8' # 0xF8 -> GREEK SMALL LETTER PSI + u'\u03c9' # 0xF9 -> GREEK SMALL LETTER OMEGA + u'\u03ca' # 0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03cb' # 0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03cc' # 0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03ce' # 0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\ufffe' # 0xFF -> UNDEFINED ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x0384: 0xB4, # GREEK TONOS - 0x0385: 0xA1, # GREEK DIALYTIKA TONOS - 0x0386: 0xA2, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0388: 0xB8, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0389: 0xB9, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038A: 0xBA, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038C: 0xBC, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038E: 0xBE, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038F: 0xBF, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0390: 0xC0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x0391: 0xC1, # GREEK CAPITAL LETTER ALPHA - 0x0392: 0xC2, # GREEK CAPITAL LETTER BETA - 0x0393: 0xC3, # GREEK CAPITAL LETTER GAMMA - 0x0394: 0xC4, # GREEK CAPITAL LETTER DELTA - 0x0395: 0xC5, # GREEK CAPITAL LETTER EPSILON - 0x0396: 0xC6, # GREEK CAPITAL LETTER ZETA - 0x0397: 0xC7, # GREEK CAPITAL LETTER ETA - 0x0398: 0xC8, # GREEK CAPITAL LETTER THETA - 0x0399: 0xC9, # GREEK CAPITAL LETTER IOTA - 0x039A: 0xCA, # GREEK CAPITAL LETTER KAPPA - 0x039B: 0xCB, # GREEK CAPITAL LETTER LAMDA - 0x039C: 0xCC, # GREEK CAPITAL LETTER MU - 0x039D: 0xCD, # GREEK CAPITAL LETTER NU - 0x039E: 0xCE, # GREEK CAPITAL LETTER XI - 0x039F: 0xCF, # GREEK CAPITAL LETTER OMICRON - 0x03A0: 0xD0, # GREEK CAPITAL LETTER PI - 0x03A1: 0xD1, # GREEK CAPITAL LETTER RHO - 0x03A3: 0xD3, # GREEK CAPITAL LETTER SIGMA - 0x03A4: 0xD4, # GREEK CAPITAL LETTER TAU - 0x03A5: 0xD5, # GREEK CAPITAL LETTER UPSILON - 0x03A6: 0xD6, # GREEK CAPITAL LETTER PHI - 0x03A7: 0xD7, # GREEK CAPITAL LETTER CHI - 0x03A8: 0xD8, # GREEK CAPITAL LETTER PSI - 0x03A9: 0xD9, # GREEK CAPITAL LETTER OMEGA - 0x03AA: 0xDA, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03AB: 0xDB, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03AC: 0xDC, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03AD: 0xDD, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03AE: 0xDE, # GREEK SMALL LETTER ETA WITH TONOS - 0x03AF: 0xDF, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03B0: 0xE0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x03B1: 0xE1, # GREEK SMALL LETTER ALPHA - 0x03B2: 0xE2, # GREEK SMALL LETTER BETA - 0x03B3: 0xE3, # GREEK SMALL LETTER GAMMA - 0x03B4: 0xE4, # GREEK SMALL LETTER DELTA - 0x03B5: 0xE5, # GREEK SMALL LETTER EPSILON - 0x03B6: 0xE6, # GREEK SMALL LETTER ZETA - 0x03B7: 0xE7, # GREEK SMALL LETTER ETA - 0x03B8: 0xE8, # GREEK SMALL LETTER THETA - 0x03B9: 0xE9, # GREEK SMALL LETTER IOTA - 0x03BA: 0xEA, # GREEK SMALL LETTER KAPPA - 0x03BB: 0xEB, # GREEK SMALL LETTER LAMDA - 0x03BC: 0xEC, # GREEK SMALL LETTER MU - 0x03BD: 0xED, # GREEK SMALL LETTER NU - 0x03BE: 0xEE, # GREEK SMALL LETTER XI - 0x03BF: 0xEF, # GREEK SMALL LETTER OMICRON - 0x03C0: 0xF0, # GREEK SMALL LETTER PI - 0x03C1: 0xF1, # GREEK SMALL LETTER RHO - 0x03C2: 0xF2, # GREEK SMALL LETTER FINAL SIGMA - 0x03C3: 0xF3, # GREEK SMALL LETTER SIGMA - 0x03C4: 0xF4, # GREEK SMALL LETTER TAU - 0x03C5: 0xF5, # GREEK SMALL LETTER UPSILON - 0x03C6: 0xF6, # GREEK SMALL LETTER PHI - 0x03C7: 0xF7, # GREEK SMALL LETTER CHI - 0x03C8: 0xF8, # GREEK SMALL LETTER PSI - 0x03C9: 0xF9, # GREEK SMALL LETTER OMEGA - 0x03CA: 0xFA, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03CB: 0xFB, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03CC: 0xFC, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03CD: 0xFD, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03CE: 0xFE, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2015: 0xAF, # HORIZONTAL BAR - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AC: 0x80, # EURO SIGN - 0x2122: 0x99, # TRADE MARK SIGN + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK + 0x0384: 0xB4, # GREEK TONOS + 0x0385: 0xA1, # GREEK DIALYTIKA TONOS + 0x0386: 0xA2, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0xB8, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0xB9, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038A: 0xBA, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038C: 0xBC, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038E: 0xBE, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038F: 0xBF, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0xC0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x0391: 0xC1, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0xC2, # GREEK CAPITAL LETTER BETA + 0x0393: 0xC3, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0xC4, # GREEK CAPITAL LETTER DELTA + 0x0395: 0xC5, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0xC6, # GREEK CAPITAL LETTER ZETA + 0x0397: 0xC7, # GREEK CAPITAL LETTER ETA + 0x0398: 0xC8, # GREEK CAPITAL LETTER THETA + 0x0399: 0xC9, # GREEK CAPITAL LETTER IOTA + 0x039A: 0xCA, # GREEK CAPITAL LETTER KAPPA + 0x039B: 0xCB, # GREEK CAPITAL LETTER LAMDA + 0x039C: 0xCC, # GREEK CAPITAL LETTER MU + 0x039D: 0xCD, # GREEK CAPITAL LETTER NU + 0x039E: 0xCE, # GREEK CAPITAL LETTER XI + 0x039F: 0xCF, # GREEK CAPITAL LETTER OMICRON + 0x03A0: 0xD0, # GREEK CAPITAL LETTER PI + 0x03A1: 0xD1, # GREEK CAPITAL LETTER RHO + 0x03A3: 0xD3, # GREEK CAPITAL LETTER SIGMA + 0x03A4: 0xD4, # GREEK CAPITAL LETTER TAU + 0x03A5: 0xD5, # GREEK CAPITAL LETTER UPSILON + 0x03A6: 0xD6, # GREEK CAPITAL LETTER PHI + 0x03A7: 0xD7, # GREEK CAPITAL LETTER CHI + 0x03A8: 0xD8, # GREEK CAPITAL LETTER PSI + 0x03A9: 0xD9, # GREEK CAPITAL LETTER OMEGA + 0x03AA: 0xDA, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03AB: 0xDB, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03AC: 0xDC, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03AD: 0xDD, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03AE: 0xDE, # GREEK SMALL LETTER ETA WITH TONOS + 0x03AF: 0xDF, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03B0: 0xE0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03B1: 0xE1, # GREEK SMALL LETTER ALPHA + 0x03B2: 0xE2, # GREEK SMALL LETTER BETA + 0x03B3: 0xE3, # GREEK SMALL LETTER GAMMA + 0x03B4: 0xE4, # GREEK SMALL LETTER DELTA + 0x03B5: 0xE5, # GREEK SMALL LETTER EPSILON + 0x03B6: 0xE6, # GREEK SMALL LETTER ZETA + 0x03B7: 0xE7, # GREEK SMALL LETTER ETA + 0x03B8: 0xE8, # GREEK SMALL LETTER THETA + 0x03B9: 0xE9, # GREEK SMALL LETTER IOTA + 0x03BA: 0xEA, # GREEK SMALL LETTER KAPPA + 0x03BB: 0xEB, # GREEK SMALL LETTER LAMDA + 0x03BC: 0xEC, # GREEK SMALL LETTER MU + 0x03BD: 0xED, # GREEK SMALL LETTER NU + 0x03BE: 0xEE, # GREEK SMALL LETTER XI + 0x03BF: 0xEF, # GREEK SMALL LETTER OMICRON + 0x03C0: 0xF0, # GREEK SMALL LETTER PI + 0x03C1: 0xF1, # GREEK SMALL LETTER RHO + 0x03C2: 0xF2, # GREEK SMALL LETTER FINAL SIGMA + 0x03C3: 0xF3, # GREEK SMALL LETTER SIGMA + 0x03C4: 0xF4, # GREEK SMALL LETTER TAU + 0x03C5: 0xF5, # GREEK SMALL LETTER UPSILON + 0x03C6: 0xF6, # GREEK SMALL LETTER PHI + 0x03C7: 0xF7, # GREEK SMALL LETTER CHI + 0x03C8: 0xF8, # GREEK SMALL LETTER PSI + 0x03C9: 0xF9, # GREEK SMALL LETTER OMEGA + 0x03CA: 0xFA, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03CB: 0xFB, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03CC: 0xFC, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03CD: 0xFD, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03CE: 0xFE, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2015: 0xAF, # HORIZONTAL BAR + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AC: 0x80, # EURO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } - Modified: python/branches/ssize_t/Lib/encodings/cp1254.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp1254.py (original) +++ python/branches/ssize_t/Lib/encodings/cp1254.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,515 +32,514 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\u02dc' # 0x98 -> SMALL TILDE - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE - u'\ufffe' # 0x9D -> UNDEFINED - u'\ufffe' # 0x9E -> UNDEFINED - u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u011e' # 0xD0 -> LATIN CAPITAL LETTER G WITH BREVE - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0130' # 0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u011f' # 0xF0 -> LATIN SMALL LETTER G WITH BREVE - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u0131' # 0xFD -> LATIN SMALL LETTER DOTLESS I - u'\u015f' # 0xFE -> LATIN SMALL LETTER S WITH CEDILLA - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\ufffe' # 0x81 -> UNDEFINED + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\ufffe' # 0x8F -> UNDEFINED + u'\ufffe' # 0x90 -> UNDEFINED + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\u02dc' # 0x98 -> SMALL TILDE + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE + u'\ufffe' # 0x9D -> UNDEFINED + u'\ufffe' # 0x9E -> UNDEFINED + u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u011e' # 0xD0 -> LATIN CAPITAL LETTER G WITH BREVE + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0130' # 0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u011f' # 0xF0 -> LATIN SMALL LETTER G WITH BREVE + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0131' # 0xFD -> LATIN SMALL LETTER DOTLESS I + u'\u015f' # 0xFE -> LATIN SMALL LETTER S WITH CEDILLA + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xBF, # INVERTED QUESTION MARK - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x011E: 0xD0, # LATIN CAPITAL LETTER G WITH BREVE - 0x011F: 0xF0, # LATIN SMALL LETTER G WITH BREVE - 0x0130: 0xDD, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0131: 0xFD, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE - 0x0153: 0x9C, # LATIN SMALL LIGATURE OE - 0x015E: 0xDE, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015F: 0xFE, # LATIN SMALL LETTER S WITH CEDILLA - 0x0160: 0x8A, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0x9A, # LATIN SMALL LETTER S WITH CARON - 0x0178: 0x9F, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02DC: 0x98, # SMALL TILDE - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AC: 0x80, # EURO SIGN - 0x2122: 0x99, # TRADE MARK SIGN + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xBF, # INVERTED QUESTION MARK + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011E: 0xD0, # LATIN CAPITAL LETTER G WITH BREVE + 0x011F: 0xF0, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0xDD, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0xFD, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x9C, # LATIN SMALL LIGATURE OE + 0x015E: 0xDE, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015F: 0xFE, # LATIN SMALL LETTER S WITH CEDILLA + 0x0160: 0x8A, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x9A, # LATIN SMALL LETTER S WITH CARON + 0x0178: 0x9F, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK + 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02DC: 0x98, # SMALL TILDE + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AC: 0x80, # EURO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } - Modified: python/branches/ssize_t/Lib/encodings/cp1255.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp1255.py (original) +++ python/branches/ssize_t/Lib/encodings/cp1255.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,499 +32,498 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\ufffe' # 0x8A -> UNDEFINED - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x8C -> UNDEFINED - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\u02dc' # 0x98 -> SMALL TILDE - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\ufffe' # 0x9A -> UNDEFINED - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x9C -> UNDEFINED - u'\ufffe' # 0x9D -> UNDEFINED - u'\ufffe' # 0x9E -> UNDEFINED - u'\ufffe' # 0x9F -> UNDEFINED - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\u20aa' # 0xA4 -> NEW SHEQEL SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xd7' # 0xAA -> MULTIPLICATION SIGN - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xf7' # 0xBA -> DIVISION SIGN - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\u05b0' # 0xC0 -> HEBREW POINT SHEVA - u'\u05b1' # 0xC1 -> HEBREW POINT HATAF SEGOL - u'\u05b2' # 0xC2 -> HEBREW POINT HATAF PATAH - u'\u05b3' # 0xC3 -> HEBREW POINT HATAF QAMATS - u'\u05b4' # 0xC4 -> HEBREW POINT HIRIQ - u'\u05b5' # 0xC5 -> HEBREW POINT TSERE - u'\u05b6' # 0xC6 -> HEBREW POINT SEGOL - u'\u05b7' # 0xC7 -> HEBREW POINT PATAH - u'\u05b8' # 0xC8 -> HEBREW POINT QAMATS - u'\u05b9' # 0xC9 -> HEBREW POINT HOLAM - u'\ufffe' # 0xCA -> UNDEFINED - u'\u05bb' # 0xCB -> HEBREW POINT QUBUTS - u'\u05bc' # 0xCC -> HEBREW POINT DAGESH OR MAPIQ - u'\u05bd' # 0xCD -> HEBREW POINT METEG - u'\u05be' # 0xCE -> HEBREW PUNCTUATION MAQAF - u'\u05bf' # 0xCF -> HEBREW POINT RAFE - u'\u05c0' # 0xD0 -> HEBREW PUNCTUATION PASEQ - u'\u05c1' # 0xD1 -> HEBREW POINT SHIN DOT - u'\u05c2' # 0xD2 -> HEBREW POINT SIN DOT - u'\u05c3' # 0xD3 -> HEBREW PUNCTUATION SOF PASUQ - u'\u05f0' # 0xD4 -> HEBREW LIGATURE YIDDISH DOUBLE VAV - u'\u05f1' # 0xD5 -> HEBREW LIGATURE YIDDISH VAV YOD - u'\u05f2' # 0xD6 -> HEBREW LIGATURE YIDDISH DOUBLE YOD - u'\u05f3' # 0xD7 -> HEBREW PUNCTUATION GERESH - u'\u05f4' # 0xD8 -> HEBREW PUNCTUATION GERSHAYIM - u'\ufffe' # 0xD9 -> UNDEFINED - u'\ufffe' # 0xDA -> UNDEFINED - u'\ufffe' # 0xDB -> UNDEFINED - u'\ufffe' # 0xDC -> UNDEFINED - u'\ufffe' # 0xDD -> UNDEFINED - u'\ufffe' # 0xDE -> UNDEFINED - u'\ufffe' # 0xDF -> UNDEFINED - u'\u05d0' # 0xE0 -> HEBREW LETTER ALEF - u'\u05d1' # 0xE1 -> HEBREW LETTER BET - u'\u05d2' # 0xE2 -> HEBREW LETTER GIMEL - u'\u05d3' # 0xE3 -> HEBREW LETTER DALET - u'\u05d4' # 0xE4 -> HEBREW LETTER HE - u'\u05d5' # 0xE5 -> HEBREW LETTER VAV - u'\u05d6' # 0xE6 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0xE7 -> HEBREW LETTER HET - u'\u05d8' # 0xE8 -> HEBREW LETTER TET - u'\u05d9' # 0xE9 -> HEBREW LETTER YOD - u'\u05da' # 0xEA -> HEBREW LETTER FINAL KAF - u'\u05db' # 0xEB -> HEBREW LETTER KAF - u'\u05dc' # 0xEC -> HEBREW LETTER LAMED - u'\u05dd' # 0xED -> HEBREW LETTER FINAL MEM - u'\u05de' # 0xEE -> HEBREW LETTER MEM - u'\u05df' # 0xEF -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0xF0 -> HEBREW LETTER NUN - u'\u05e1' # 0xF1 -> HEBREW LETTER SAMEKH - u'\u05e2' # 0xF2 -> HEBREW LETTER AYIN - u'\u05e3' # 0xF3 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0xF4 -> HEBREW LETTER PE - u'\u05e5' # 0xF5 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0xF6 -> HEBREW LETTER TSADI - u'\u05e7' # 0xF7 -> HEBREW LETTER QOF - u'\u05e8' # 0xF8 -> HEBREW LETTER RESH - u'\u05e9' # 0xF9 -> HEBREW LETTER SHIN - u'\u05ea' # 0xFA -> HEBREW LETTER TAV - u'\ufffe' # 0xFB -> UNDEFINED - u'\ufffe' # 0xFC -> UNDEFINED - u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK - u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK - u'\ufffe' # 0xFF -> UNDEFINED + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\ufffe' # 0x81 -> UNDEFINED + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\ufffe' # 0x8A -> UNDEFINED + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x8C -> UNDEFINED + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\ufffe' # 0x8F -> UNDEFINED + u'\ufffe' # 0x90 -> UNDEFINED + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\u02dc' # 0x98 -> SMALL TILDE + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\ufffe' # 0x9A -> UNDEFINED + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x9C -> UNDEFINED + u'\ufffe' # 0x9D -> UNDEFINED + u'\ufffe' # 0x9E -> UNDEFINED + u'\ufffe' # 0x9F -> UNDEFINED + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\u20aa' # 0xA4 -> NEW SHEQEL SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xd7' # 0xAA -> MULTIPLICATION SIGN + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xf7' # 0xBA -> DIVISION SIGN + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\u05b0' # 0xC0 -> HEBREW POINT SHEVA + u'\u05b1' # 0xC1 -> HEBREW POINT HATAF SEGOL + u'\u05b2' # 0xC2 -> HEBREW POINT HATAF PATAH + u'\u05b3' # 0xC3 -> HEBREW POINT HATAF QAMATS + u'\u05b4' # 0xC4 -> HEBREW POINT HIRIQ + u'\u05b5' # 0xC5 -> HEBREW POINT TSERE + u'\u05b6' # 0xC6 -> HEBREW POINT SEGOL + u'\u05b7' # 0xC7 -> HEBREW POINT PATAH + u'\u05b8' # 0xC8 -> HEBREW POINT QAMATS + u'\u05b9' # 0xC9 -> HEBREW POINT HOLAM + u'\ufffe' # 0xCA -> UNDEFINED + u'\u05bb' # 0xCB -> HEBREW POINT QUBUTS + u'\u05bc' # 0xCC -> HEBREW POINT DAGESH OR MAPIQ + u'\u05bd' # 0xCD -> HEBREW POINT METEG + u'\u05be' # 0xCE -> HEBREW PUNCTUATION MAQAF + u'\u05bf' # 0xCF -> HEBREW POINT RAFE + u'\u05c0' # 0xD0 -> HEBREW PUNCTUATION PASEQ + u'\u05c1' # 0xD1 -> HEBREW POINT SHIN DOT + u'\u05c2' # 0xD2 -> HEBREW POINT SIN DOT + u'\u05c3' # 0xD3 -> HEBREW PUNCTUATION SOF PASUQ + u'\u05f0' # 0xD4 -> HEBREW LIGATURE YIDDISH DOUBLE VAV + u'\u05f1' # 0xD5 -> HEBREW LIGATURE YIDDISH VAV YOD + u'\u05f2' # 0xD6 -> HEBREW LIGATURE YIDDISH DOUBLE YOD + u'\u05f3' # 0xD7 -> HEBREW PUNCTUATION GERESH + u'\u05f4' # 0xD8 -> HEBREW PUNCTUATION GERSHAYIM + u'\ufffe' # 0xD9 -> UNDEFINED + u'\ufffe' # 0xDA -> UNDEFINED + u'\ufffe' # 0xDB -> UNDEFINED + u'\ufffe' # 0xDC -> UNDEFINED + u'\ufffe' # 0xDD -> UNDEFINED + u'\ufffe' # 0xDE -> UNDEFINED + u'\ufffe' # 0xDF -> UNDEFINED + u'\u05d0' # 0xE0 -> HEBREW LETTER ALEF + u'\u05d1' # 0xE1 -> HEBREW LETTER BET + u'\u05d2' # 0xE2 -> HEBREW LETTER GIMEL + u'\u05d3' # 0xE3 -> HEBREW LETTER DALET + u'\u05d4' # 0xE4 -> HEBREW LETTER HE + u'\u05d5' # 0xE5 -> HEBREW LETTER VAV + u'\u05d6' # 0xE6 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0xE7 -> HEBREW LETTER HET + u'\u05d8' # 0xE8 -> HEBREW LETTER TET + u'\u05d9' # 0xE9 -> HEBREW LETTER YOD + u'\u05da' # 0xEA -> HEBREW LETTER FINAL KAF + u'\u05db' # 0xEB -> HEBREW LETTER KAF + u'\u05dc' # 0xEC -> HEBREW LETTER LAMED + u'\u05dd' # 0xED -> HEBREW LETTER FINAL MEM + u'\u05de' # 0xEE -> HEBREW LETTER MEM + u'\u05df' # 0xEF -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0xF0 -> HEBREW LETTER NUN + u'\u05e1' # 0xF1 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0xF2 -> HEBREW LETTER AYIN + u'\u05e3' # 0xF3 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0xF4 -> HEBREW LETTER PE + u'\u05e5' # 0xF5 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0xF6 -> HEBREW LETTER TSADI + u'\u05e7' # 0xF7 -> HEBREW LETTER QOF + u'\u05e8' # 0xF8 -> HEBREW LETTER RESH + u'\u05e9' # 0xF9 -> HEBREW LETTER SHIN + u'\u05ea' # 0xFA -> HEBREW LETTER TAV + u'\ufffe' # 0xFB -> UNDEFINED + u'\ufffe' # 0xFC -> UNDEFINED + u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK + u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK + u'\ufffe' # 0xFF -> UNDEFINED ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xBF, # INVERTED QUESTION MARK - 0x00D7: 0xAA, # MULTIPLICATION SIGN - 0x00F7: 0xBA, # DIVISION SIGN - 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02DC: 0x98, # SMALL TILDE - 0x05B0: 0xC0, # HEBREW POINT SHEVA - 0x05B1: 0xC1, # HEBREW POINT HATAF SEGOL - 0x05B2: 0xC2, # HEBREW POINT HATAF PATAH - 0x05B3: 0xC3, # HEBREW POINT HATAF QAMATS - 0x05B4: 0xC4, # HEBREW POINT HIRIQ - 0x05B5: 0xC5, # HEBREW POINT TSERE - 0x05B6: 0xC6, # HEBREW POINT SEGOL - 0x05B7: 0xC7, # HEBREW POINT PATAH - 0x05B8: 0xC8, # HEBREW POINT QAMATS - 0x05B9: 0xC9, # HEBREW POINT HOLAM - 0x05BB: 0xCB, # HEBREW POINT QUBUTS - 0x05BC: 0xCC, # HEBREW POINT DAGESH OR MAPIQ - 0x05BD: 0xCD, # HEBREW POINT METEG - 0x05BE: 0xCE, # HEBREW PUNCTUATION MAQAF - 0x05BF: 0xCF, # HEBREW POINT RAFE - 0x05C0: 0xD0, # HEBREW PUNCTUATION PASEQ - 0x05C1: 0xD1, # HEBREW POINT SHIN DOT - 0x05C2: 0xD2, # HEBREW POINT SIN DOT - 0x05C3: 0xD3, # HEBREW PUNCTUATION SOF PASUQ - 0x05D0: 0xE0, # HEBREW LETTER ALEF - 0x05D1: 0xE1, # HEBREW LETTER BET - 0x05D2: 0xE2, # HEBREW LETTER GIMEL - 0x05D3: 0xE3, # HEBREW LETTER DALET - 0x05D4: 0xE4, # HEBREW LETTER HE - 0x05D5: 0xE5, # HEBREW LETTER VAV - 0x05D6: 0xE6, # HEBREW LETTER ZAYIN - 0x05D7: 0xE7, # HEBREW LETTER HET - 0x05D8: 0xE8, # HEBREW LETTER TET - 0x05D9: 0xE9, # HEBREW LETTER YOD - 0x05DA: 0xEA, # HEBREW LETTER FINAL KAF - 0x05DB: 0xEB, # HEBREW LETTER KAF - 0x05DC: 0xEC, # HEBREW LETTER LAMED - 0x05DD: 0xED, # HEBREW LETTER FINAL MEM - 0x05DE: 0xEE, # HEBREW LETTER MEM - 0x05DF: 0xEF, # HEBREW LETTER FINAL NUN - 0x05E0: 0xF0, # HEBREW LETTER NUN - 0x05E1: 0xF1, # HEBREW LETTER SAMEKH - 0x05E2: 0xF2, # HEBREW LETTER AYIN - 0x05E3: 0xF3, # HEBREW LETTER FINAL PE - 0x05E4: 0xF4, # HEBREW LETTER PE - 0x05E5: 0xF5, # HEBREW LETTER FINAL TSADI - 0x05E6: 0xF6, # HEBREW LETTER TSADI - 0x05E7: 0xF7, # HEBREW LETTER QOF - 0x05E8: 0xF8, # HEBREW LETTER RESH - 0x05E9: 0xF9, # HEBREW LETTER SHIN - 0x05EA: 0xFA, # HEBREW LETTER TAV - 0x05F0: 0xD4, # HEBREW LIGATURE YIDDISH DOUBLE VAV - 0x05F1: 0xD5, # HEBREW LIGATURE YIDDISH VAV YOD - 0x05F2: 0xD6, # HEBREW LIGATURE YIDDISH DOUBLE YOD - 0x05F3: 0xD7, # HEBREW PUNCTUATION GERESH - 0x05F4: 0xD8, # HEBREW PUNCTUATION GERSHAYIM - 0x200E: 0xFD, # LEFT-TO-RIGHT MARK - 0x200F: 0xFE, # RIGHT-TO-LEFT MARK - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AA: 0xA4, # NEW SHEQEL SIGN - 0x20AC: 0x80, # EURO SIGN - 0x2122: 0x99, # TRADE MARK SIGN + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xBF, # INVERTED QUESTION MARK + 0x00D7: 0xAA, # MULTIPLICATION SIGN + 0x00F7: 0xBA, # DIVISION SIGN + 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK + 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02DC: 0x98, # SMALL TILDE + 0x05B0: 0xC0, # HEBREW POINT SHEVA + 0x05B1: 0xC1, # HEBREW POINT HATAF SEGOL + 0x05B2: 0xC2, # HEBREW POINT HATAF PATAH + 0x05B3: 0xC3, # HEBREW POINT HATAF QAMATS + 0x05B4: 0xC4, # HEBREW POINT HIRIQ + 0x05B5: 0xC5, # HEBREW POINT TSERE + 0x05B6: 0xC6, # HEBREW POINT SEGOL + 0x05B7: 0xC7, # HEBREW POINT PATAH + 0x05B8: 0xC8, # HEBREW POINT QAMATS + 0x05B9: 0xC9, # HEBREW POINT HOLAM + 0x05BB: 0xCB, # HEBREW POINT QUBUTS + 0x05BC: 0xCC, # HEBREW POINT DAGESH OR MAPIQ + 0x05BD: 0xCD, # HEBREW POINT METEG + 0x05BE: 0xCE, # HEBREW PUNCTUATION MAQAF + 0x05BF: 0xCF, # HEBREW POINT RAFE + 0x05C0: 0xD0, # HEBREW PUNCTUATION PASEQ + 0x05C1: 0xD1, # HEBREW POINT SHIN DOT + 0x05C2: 0xD2, # HEBREW POINT SIN DOT + 0x05C3: 0xD3, # HEBREW PUNCTUATION SOF PASUQ + 0x05D0: 0xE0, # HEBREW LETTER ALEF + 0x05D1: 0xE1, # HEBREW LETTER BET + 0x05D2: 0xE2, # HEBREW LETTER GIMEL + 0x05D3: 0xE3, # HEBREW LETTER DALET + 0x05D4: 0xE4, # HEBREW LETTER HE + 0x05D5: 0xE5, # HEBREW LETTER VAV + 0x05D6: 0xE6, # HEBREW LETTER ZAYIN + 0x05D7: 0xE7, # HEBREW LETTER HET + 0x05D8: 0xE8, # HEBREW LETTER TET + 0x05D9: 0xE9, # HEBREW LETTER YOD + 0x05DA: 0xEA, # HEBREW LETTER FINAL KAF + 0x05DB: 0xEB, # HEBREW LETTER KAF + 0x05DC: 0xEC, # HEBREW LETTER LAMED + 0x05DD: 0xED, # HEBREW LETTER FINAL MEM + 0x05DE: 0xEE, # HEBREW LETTER MEM + 0x05DF: 0xEF, # HEBREW LETTER FINAL NUN + 0x05E0: 0xF0, # HEBREW LETTER NUN + 0x05E1: 0xF1, # HEBREW LETTER SAMEKH + 0x05E2: 0xF2, # HEBREW LETTER AYIN + 0x05E3: 0xF3, # HEBREW LETTER FINAL PE + 0x05E4: 0xF4, # HEBREW LETTER PE + 0x05E5: 0xF5, # HEBREW LETTER FINAL TSADI + 0x05E6: 0xF6, # HEBREW LETTER TSADI + 0x05E7: 0xF7, # HEBREW LETTER QOF + 0x05E8: 0xF8, # HEBREW LETTER RESH + 0x05E9: 0xF9, # HEBREW LETTER SHIN + 0x05EA: 0xFA, # HEBREW LETTER TAV + 0x05F0: 0xD4, # HEBREW LIGATURE YIDDISH DOUBLE VAV + 0x05F1: 0xD5, # HEBREW LIGATURE YIDDISH VAV YOD + 0x05F2: 0xD6, # HEBREW LIGATURE YIDDISH DOUBLE YOD + 0x05F3: 0xD7, # HEBREW PUNCTUATION GERESH + 0x05F4: 0xD8, # HEBREW PUNCTUATION GERSHAYIM + 0x200E: 0xFD, # LEFT-TO-RIGHT MARK + 0x200F: 0xFE, # RIGHT-TO-LEFT MARK + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AA: 0xA4, # NEW SHEQEL SIGN + 0x20AC: 0x80, # EURO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } - Modified: python/branches/ssize_t/Lib/encodings/cp1256.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp1256.py (original) +++ python/branches/ssize_t/Lib/encodings/cp1256.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\u067e' # 0x81 -> ARABIC LETTER PEH - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0679' # 0x8A -> ARABIC LETTER TTEH - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE - u'\u0686' # 0x8D -> ARABIC LETTER TCHEH - u'\u0698' # 0x8E -> ARABIC LETTER JEH - u'\u0688' # 0x8F -> ARABIC LETTER DDAL - u'\u06af' # 0x90 -> ARABIC LETTER GAF - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\u06a9' # 0x98 -> ARABIC LETTER KEHEH - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0691' # 0x9A -> ARABIC LETTER RREH - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE - u'\u200c' # 0x9D -> ZERO WIDTH NON-JOINER - u'\u200d' # 0x9E -> ZERO WIDTH JOINER - u'\u06ba' # 0x9F -> ARABIC LETTER NOON GHUNNA - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u060c' # 0xA1 -> ARABIC COMMA - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u06be' # 0xAA -> ARABIC LETTER HEH DOACHASHMEE - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\u061b' # 0xBA -> ARABIC SEMICOLON - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\u061f' # 0xBF -> ARABIC QUESTION MARK - u'\u06c1' # 0xC0 -> ARABIC LETTER HEH GOAL - u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA - u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0xC7 -> ARABIC LETTER ALEF - u'\u0628' # 0xC8 -> ARABIC LETTER BEH - u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0xCA -> ARABIC LETTER TEH - u'\u062b' # 0xCB -> ARABIC LETTER THEH - u'\u062c' # 0xCC -> ARABIC LETTER JEEM - u'\u062d' # 0xCD -> ARABIC LETTER HAH - u'\u062e' # 0xCE -> ARABIC LETTER KHAH - u'\u062f' # 0xCF -> ARABIC LETTER DAL - u'\u0630' # 0xD0 -> ARABIC LETTER THAL - u'\u0631' # 0xD1 -> ARABIC LETTER REH - u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN - u'\u0633' # 0xD3 -> ARABIC LETTER SEEN - u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN - u'\u0635' # 0xD5 -> ARABIC LETTER SAD - u'\u0636' # 0xD6 -> ARABIC LETTER DAD - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u0637' # 0xD8 -> ARABIC LETTER TAH - u'\u0638' # 0xD9 -> ARABIC LETTER ZAH - u'\u0639' # 0xDA -> ARABIC LETTER AIN - u'\u063a' # 0xDB -> ARABIC LETTER GHAIN - u'\u0640' # 0xDC -> ARABIC TATWEEL - u'\u0641' # 0xDD -> ARABIC LETTER FEH - u'\u0642' # 0xDE -> ARABIC LETTER QAF - u'\u0643' # 0xDF -> ARABIC LETTER KAF - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\u0644' # 0xE1 -> ARABIC LETTER LAM - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0645' # 0xE3 -> ARABIC LETTER MEEM - u'\u0646' # 0xE4 -> ARABIC LETTER NOON - u'\u0647' # 0xE5 -> ARABIC LETTER HEH - u'\u0648' # 0xE6 -> ARABIC LETTER WAW - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0649' # 0xEC -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0xED -> ARABIC LETTER YEH - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u064b' # 0xF0 -> ARABIC FATHATAN - u'\u064c' # 0xF1 -> ARABIC DAMMATAN - u'\u064d' # 0xF2 -> ARABIC KASRATAN - u'\u064e' # 0xF3 -> ARABIC FATHA - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u064f' # 0xF5 -> ARABIC DAMMA - u'\u0650' # 0xF6 -> ARABIC KASRA - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u0651' # 0xF8 -> ARABIC SHADDA - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\u0652' # 0xFA -> ARABIC SUKUN - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK - u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK - u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\u067e' # 0x81 -> ARABIC LETTER PEH + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\u0679' # 0x8A -> ARABIC LETTER TTEH + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE + u'\u0686' # 0x8D -> ARABIC LETTER TCHEH + u'\u0698' # 0x8E -> ARABIC LETTER JEH + u'\u0688' # 0x8F -> ARABIC LETTER DDAL + u'\u06af' # 0x90 -> ARABIC LETTER GAF + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\u06a9' # 0x98 -> ARABIC LETTER KEHEH + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\u0691' # 0x9A -> ARABIC LETTER RREH + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE + u'\u200c' # 0x9D -> ZERO WIDTH NON-JOINER + u'\u200d' # 0x9E -> ZERO WIDTH JOINER + u'\u06ba' # 0x9F -> ARABIC LETTER NOON GHUNNA + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u060c' # 0xA1 -> ARABIC COMMA + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u06be' # 0xAA -> ARABIC LETTER HEH DOACHASHMEE + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\u061b' # 0xBA -> ARABIC SEMICOLON + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\u061f' # 0xBF -> ARABIC QUESTION MARK + u'\u06c1' # 0xC0 -> ARABIC LETTER HEH GOAL + u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA + u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE + u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE + u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE + u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW + u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE + u'\u0627' # 0xC7 -> ARABIC LETTER ALEF + u'\u0628' # 0xC8 -> ARABIC LETTER BEH + u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA + u'\u062a' # 0xCA -> ARABIC LETTER TEH + u'\u062b' # 0xCB -> ARABIC LETTER THEH + u'\u062c' # 0xCC -> ARABIC LETTER JEEM + u'\u062d' # 0xCD -> ARABIC LETTER HAH + u'\u062e' # 0xCE -> ARABIC LETTER KHAH + u'\u062f' # 0xCF -> ARABIC LETTER DAL + u'\u0630' # 0xD0 -> ARABIC LETTER THAL + u'\u0631' # 0xD1 -> ARABIC LETTER REH + u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN + u'\u0633' # 0xD3 -> ARABIC LETTER SEEN + u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN + u'\u0635' # 0xD5 -> ARABIC LETTER SAD + u'\u0636' # 0xD6 -> ARABIC LETTER DAD + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u0637' # 0xD8 -> ARABIC LETTER TAH + u'\u0638' # 0xD9 -> ARABIC LETTER ZAH + u'\u0639' # 0xDA -> ARABIC LETTER AIN + u'\u063a' # 0xDB -> ARABIC LETTER GHAIN + u'\u0640' # 0xDC -> ARABIC TATWEEL + u'\u0641' # 0xDD -> ARABIC LETTER FEH + u'\u0642' # 0xDE -> ARABIC LETTER QAF + u'\u0643' # 0xDF -> ARABIC LETTER KAF + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\u0644' # 0xE1 -> ARABIC LETTER LAM + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0645' # 0xE3 -> ARABIC LETTER MEEM + u'\u0646' # 0xE4 -> ARABIC LETTER NOON + u'\u0647' # 0xE5 -> ARABIC LETTER HEH + u'\u0648' # 0xE6 -> ARABIC LETTER WAW + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0649' # 0xEC -> ARABIC LETTER ALEF MAKSURA + u'\u064a' # 0xED -> ARABIC LETTER YEH + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u064b' # 0xF0 -> ARABIC FATHATAN + u'\u064c' # 0xF1 -> ARABIC DAMMATAN + u'\u064d' # 0xF2 -> ARABIC KASRATAN + u'\u064e' # 0xF3 -> ARABIC FATHA + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u064f' # 0xF5 -> ARABIC DAMMA + u'\u0650' # 0xF6 -> ARABIC KASRA + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u0651' # 0xF8 -> ARABIC SHADDA + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\u0652' # 0xFA -> ARABIC SUKUN + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK + u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK + u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE - 0x0153: 0x9C, # LATIN SMALL LIGATURE OE - 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x060C: 0xA1, # ARABIC COMMA - 0x061B: 0xBA, # ARABIC SEMICOLON - 0x061F: 0xBF, # ARABIC QUESTION MARK - 0x0621: 0xC1, # ARABIC LETTER HAMZA - 0x0622: 0xC2, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x0623: 0xC3, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x0624: 0xC4, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x0625: 0xC5, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x0626: 0xC6, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x0627: 0xC7, # ARABIC LETTER ALEF - 0x0628: 0xC8, # ARABIC LETTER BEH - 0x0629: 0xC9, # ARABIC LETTER TEH MARBUTA - 0x062A: 0xCA, # ARABIC LETTER TEH - 0x062B: 0xCB, # ARABIC LETTER THEH - 0x062C: 0xCC, # ARABIC LETTER JEEM - 0x062D: 0xCD, # ARABIC LETTER HAH - 0x062E: 0xCE, # ARABIC LETTER KHAH - 0x062F: 0xCF, # ARABIC LETTER DAL - 0x0630: 0xD0, # ARABIC LETTER THAL - 0x0631: 0xD1, # ARABIC LETTER REH - 0x0632: 0xD2, # ARABIC LETTER ZAIN - 0x0633: 0xD3, # ARABIC LETTER SEEN - 0x0634: 0xD4, # ARABIC LETTER SHEEN - 0x0635: 0xD5, # ARABIC LETTER SAD - 0x0636: 0xD6, # ARABIC LETTER DAD - 0x0637: 0xD8, # ARABIC LETTER TAH - 0x0638: 0xD9, # ARABIC LETTER ZAH - 0x0639: 0xDA, # ARABIC LETTER AIN - 0x063A: 0xDB, # ARABIC LETTER GHAIN - 0x0640: 0xDC, # ARABIC TATWEEL - 0x0641: 0xDD, # ARABIC LETTER FEH - 0x0642: 0xDE, # ARABIC LETTER QAF - 0x0643: 0xDF, # ARABIC LETTER KAF - 0x0644: 0xE1, # ARABIC LETTER LAM - 0x0645: 0xE3, # ARABIC LETTER MEEM - 0x0646: 0xE4, # ARABIC LETTER NOON - 0x0647: 0xE5, # ARABIC LETTER HEH - 0x0648: 0xE6, # ARABIC LETTER WAW - 0x0649: 0xEC, # ARABIC LETTER ALEF MAKSURA - 0x064A: 0xED, # ARABIC LETTER YEH - 0x064B: 0xF0, # ARABIC FATHATAN - 0x064C: 0xF1, # ARABIC DAMMATAN - 0x064D: 0xF2, # ARABIC KASRATAN - 0x064E: 0xF3, # ARABIC FATHA - 0x064F: 0xF5, # ARABIC DAMMA - 0x0650: 0xF6, # ARABIC KASRA - 0x0651: 0xF8, # ARABIC SHADDA - 0x0652: 0xFA, # ARABIC SUKUN - 0x0679: 0x8A, # ARABIC LETTER TTEH - 0x067E: 0x81, # ARABIC LETTER PEH - 0x0686: 0x8D, # ARABIC LETTER TCHEH - 0x0688: 0x8F, # ARABIC LETTER DDAL - 0x0691: 0x9A, # ARABIC LETTER RREH - 0x0698: 0x8E, # ARABIC LETTER JEH - 0x06A9: 0x98, # ARABIC LETTER KEHEH - 0x06AF: 0x90, # ARABIC LETTER GAF - 0x06BA: 0x9F, # ARABIC LETTER NOON GHUNNA - 0x06BE: 0xAA, # ARABIC LETTER HEH DOACHASHMEE - 0x06C1: 0xC0, # ARABIC LETTER HEH GOAL - 0x06D2: 0xFF, # ARABIC LETTER YEH BARREE - 0x200C: 0x9D, # ZERO WIDTH NON-JOINER - 0x200D: 0x9E, # ZERO WIDTH JOINER - 0x200E: 0xFD, # LEFT-TO-RIGHT MARK - 0x200F: 0xFE, # RIGHT-TO-LEFT MARK - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AC: 0x80, # EURO SIGN - 0x2122: 0x99, # TRADE MARK SIGN + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x9C, # LATIN SMALL LIGATURE OE + 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK + 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x060C: 0xA1, # ARABIC COMMA + 0x061B: 0xBA, # ARABIC SEMICOLON + 0x061F: 0xBF, # ARABIC QUESTION MARK + 0x0621: 0xC1, # ARABIC LETTER HAMZA + 0x0622: 0xC2, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x0623: 0xC3, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x0624: 0xC4, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x0625: 0xC5, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x0626: 0xC6, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x0627: 0xC7, # ARABIC LETTER ALEF + 0x0628: 0xC8, # ARABIC LETTER BEH + 0x0629: 0xC9, # ARABIC LETTER TEH MARBUTA + 0x062A: 0xCA, # ARABIC LETTER TEH + 0x062B: 0xCB, # ARABIC LETTER THEH + 0x062C: 0xCC, # ARABIC LETTER JEEM + 0x062D: 0xCD, # ARABIC LETTER HAH + 0x062E: 0xCE, # ARABIC LETTER KHAH + 0x062F: 0xCF, # ARABIC LETTER DAL + 0x0630: 0xD0, # ARABIC LETTER THAL + 0x0631: 0xD1, # ARABIC LETTER REH + 0x0632: 0xD2, # ARABIC LETTER ZAIN + 0x0633: 0xD3, # ARABIC LETTER SEEN + 0x0634: 0xD4, # ARABIC LETTER SHEEN + 0x0635: 0xD5, # ARABIC LETTER SAD + 0x0636: 0xD6, # ARABIC LETTER DAD + 0x0637: 0xD8, # ARABIC LETTER TAH + 0x0638: 0xD9, # ARABIC LETTER ZAH + 0x0639: 0xDA, # ARABIC LETTER AIN + 0x063A: 0xDB, # ARABIC LETTER GHAIN + 0x0640: 0xDC, # ARABIC TATWEEL + 0x0641: 0xDD, # ARABIC LETTER FEH + 0x0642: 0xDE, # ARABIC LETTER QAF + 0x0643: 0xDF, # ARABIC LETTER KAF + 0x0644: 0xE1, # ARABIC LETTER LAM + 0x0645: 0xE3, # ARABIC LETTER MEEM + 0x0646: 0xE4, # ARABIC LETTER NOON + 0x0647: 0xE5, # ARABIC LETTER HEH + 0x0648: 0xE6, # ARABIC LETTER WAW + 0x0649: 0xEC, # ARABIC LETTER ALEF MAKSURA + 0x064A: 0xED, # ARABIC LETTER YEH + 0x064B: 0xF0, # ARABIC FATHATAN + 0x064C: 0xF1, # ARABIC DAMMATAN + 0x064D: 0xF2, # ARABIC KASRATAN + 0x064E: 0xF3, # ARABIC FATHA + 0x064F: 0xF5, # ARABIC DAMMA + 0x0650: 0xF6, # ARABIC KASRA + 0x0651: 0xF8, # ARABIC SHADDA + 0x0652: 0xFA, # ARABIC SUKUN + 0x0679: 0x8A, # ARABIC LETTER TTEH + 0x067E: 0x81, # ARABIC LETTER PEH + 0x0686: 0x8D, # ARABIC LETTER TCHEH + 0x0688: 0x8F, # ARABIC LETTER DDAL + 0x0691: 0x9A, # ARABIC LETTER RREH + 0x0698: 0x8E, # ARABIC LETTER JEH + 0x06A9: 0x98, # ARABIC LETTER KEHEH + 0x06AF: 0x90, # ARABIC LETTER GAF + 0x06BA: 0x9F, # ARABIC LETTER NOON GHUNNA + 0x06BE: 0xAA, # ARABIC LETTER HEH DOACHASHMEE + 0x06C1: 0xC0, # ARABIC LETTER HEH GOAL + 0x06D2: 0xFF, # ARABIC LETTER YEH BARREE + 0x200C: 0x9D, # ZERO WIDTH NON-JOINER + 0x200D: 0x9E, # ZERO WIDTH JOINER + 0x200E: 0xFD, # LEFT-TO-RIGHT MARK + 0x200F: 0xFE, # RIGHT-TO-LEFT MARK + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AC: 0x80, # EURO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } - Modified: python/branches/ssize_t/Lib/encodings/cp1257.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp1257.py (original) +++ python/branches/ssize_t/Lib/encodings/cp1257.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,510 +32,509 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\ufffe' # 0x83 -> UNDEFINED - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\ufffe' # 0x88 -> UNDEFINED - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\ufffe' # 0x8A -> UNDEFINED - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x8C -> UNDEFINED - u'\xa8' # 0x8D -> DIAERESIS - u'\u02c7' # 0x8E -> CARON - u'\xb8' # 0x8F -> CEDILLA - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\ufffe' # 0x98 -> UNDEFINED - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\ufffe' # 0x9A -> UNDEFINED - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x9C -> UNDEFINED - u'\xaf' # 0x9D -> MACRON - u'\u02db' # 0x9E -> OGONEK - u'\ufffe' # 0x9F -> UNDEFINED - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\ufffe' # 0xA1 -> UNDEFINED - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\ufffe' # 0xA5 -> UNDEFINED - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xd8' # 0xA8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u0156' # 0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xc6' # 0xAF -> LATIN CAPITAL LETTER AE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xf8' # 0xB8 -> LATIN SMALL LETTER O WITH STROKE - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\u0157' # 0xBA -> LATIN SMALL LETTER R WITH CEDILLA - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xe6' # 0xBF -> LATIN SMALL LETTER AE - u'\u0104' # 0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u012e' # 0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u0100' # 0xC2 -> LATIN CAPITAL LETTER A WITH MACRON - u'\u0106' # 0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\u0118' # 0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u0112' # 0xC7 -> LATIN CAPITAL LETTER E WITH MACRON - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0179' # 0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\u0116' # 0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\u0122' # 0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u0136' # 0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\u012a' # 0xCE -> LATIN CAPITAL LETTER I WITH MACRON - u'\u013b' # 0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u0160' # 0xD0 -> LATIN CAPITAL LETTER S WITH CARON - u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0145' # 0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\u014c' # 0xD4 -> LATIN CAPITAL LETTER O WITH MACRON - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u0172' # 0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\u0141' # 0xD9 -> LATIN CAPITAL LETTER L WITH STROKE - u'\u015a' # 0xDA -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u016a' # 0xDB -> LATIN CAPITAL LETTER U WITH MACRON - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u017b' # 0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u017d' # 0xDE -> LATIN CAPITAL LETTER Z WITH CARON - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\u0105' # 0xE0 -> LATIN SMALL LETTER A WITH OGONEK - u'\u012f' # 0xE1 -> LATIN SMALL LETTER I WITH OGONEK - u'\u0101' # 0xE2 -> LATIN SMALL LETTER A WITH MACRON - u'\u0107' # 0xE3 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\u0119' # 0xE6 -> LATIN SMALL LETTER E WITH OGONEK - u'\u0113' # 0xE7 -> LATIN SMALL LETTER E WITH MACRON - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u017a' # 0xEA -> LATIN SMALL LETTER Z WITH ACUTE - u'\u0117' # 0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\u0123' # 0xEC -> LATIN SMALL LETTER G WITH CEDILLA - u'\u0137' # 0xED -> LATIN SMALL LETTER K WITH CEDILLA - u'\u012b' # 0xEE -> LATIN SMALL LETTER I WITH MACRON - u'\u013c' # 0xEF -> LATIN SMALL LETTER L WITH CEDILLA - u'\u0161' # 0xF0 -> LATIN SMALL LETTER S WITH CARON - u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0146' # 0xF2 -> LATIN SMALL LETTER N WITH CEDILLA - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\u014d' # 0xF4 -> LATIN SMALL LETTER O WITH MACRON - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u0173' # 0xF8 -> LATIN SMALL LETTER U WITH OGONEK - u'\u0142' # 0xF9 -> LATIN SMALL LETTER L WITH STROKE - u'\u015b' # 0xFA -> LATIN SMALL LETTER S WITH ACUTE - u'\u016b' # 0xFB -> LATIN SMALL LETTER U WITH MACRON - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u017e' # 0xFE -> LATIN SMALL LETTER Z WITH CARON - u'\u02d9' # 0xFF -> DOT ABOVE + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\ufffe' # 0x81 -> UNDEFINED + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\ufffe' # 0x83 -> UNDEFINED + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\ufffe' # 0x88 -> UNDEFINED + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\ufffe' # 0x8A -> UNDEFINED + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x8C -> UNDEFINED + u'\xa8' # 0x8D -> DIAERESIS + u'\u02c7' # 0x8E -> CARON + u'\xb8' # 0x8F -> CEDILLA + u'\ufffe' # 0x90 -> UNDEFINED + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\ufffe' # 0x98 -> UNDEFINED + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\ufffe' # 0x9A -> UNDEFINED + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x9C -> UNDEFINED + u'\xaf' # 0x9D -> MACRON + u'\u02db' # 0x9E -> OGONEK + u'\ufffe' # 0x9F -> UNDEFINED + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\ufffe' # 0xA1 -> UNDEFINED + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\ufffe' # 0xA5 -> UNDEFINED + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xd8' # 0xA8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u0156' # 0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xc6' # 0xAF -> LATIN CAPITAL LETTER AE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xf8' # 0xB8 -> LATIN SMALL LETTER O WITH STROKE + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\u0157' # 0xBA -> LATIN SMALL LETTER R WITH CEDILLA + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xe6' # 0xBF -> LATIN SMALL LETTER AE + u'\u0104' # 0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u012e' # 0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u0100' # 0xC2 -> LATIN CAPITAL LETTER A WITH MACRON + u'\u0106' # 0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\u0118' # 0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0112' # 0xC7 -> LATIN CAPITAL LETTER E WITH MACRON + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0179' # 0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\u0116' # 0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\u0122' # 0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u0136' # 0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\u012a' # 0xCE -> LATIN CAPITAL LETTER I WITH MACRON + u'\u013b' # 0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u0160' # 0xD0 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0145' # 0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\u014c' # 0xD4 -> LATIN CAPITAL LETTER O WITH MACRON + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u0172' # 0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\u0141' # 0xD9 -> LATIN CAPITAL LETTER L WITH STROKE + u'\u015a' # 0xDA -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u016a' # 0xDB -> LATIN CAPITAL LETTER U WITH MACRON + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u017b' # 0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017d' # 0xDE -> LATIN CAPITAL LETTER Z WITH CARON + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\u0105' # 0xE0 -> LATIN SMALL LETTER A WITH OGONEK + u'\u012f' # 0xE1 -> LATIN SMALL LETTER I WITH OGONEK + u'\u0101' # 0xE2 -> LATIN SMALL LETTER A WITH MACRON + u'\u0107' # 0xE3 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\u0119' # 0xE6 -> LATIN SMALL LETTER E WITH OGONEK + u'\u0113' # 0xE7 -> LATIN SMALL LETTER E WITH MACRON + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u017a' # 0xEA -> LATIN SMALL LETTER Z WITH ACUTE + u'\u0117' # 0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\u0123' # 0xEC -> LATIN SMALL LETTER G WITH CEDILLA + u'\u0137' # 0xED -> LATIN SMALL LETTER K WITH CEDILLA + u'\u012b' # 0xEE -> LATIN SMALL LETTER I WITH MACRON + u'\u013c' # 0xEF -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0161' # 0xF0 -> LATIN SMALL LETTER S WITH CARON + u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0146' # 0xF2 -> LATIN SMALL LETTER N WITH CEDILLA + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\u014d' # 0xF4 -> LATIN SMALL LETTER O WITH MACRON + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u0173' # 0xF8 -> LATIN SMALL LETTER U WITH OGONEK + u'\u0142' # 0xF9 -> LATIN SMALL LETTER L WITH STROKE + u'\u015b' # 0xFA -> LATIN SMALL LETTER S WITH ACUTE + u'\u016b' # 0xFB -> LATIN SMALL LETTER U WITH MACRON + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u017e' # 0xFE -> LATIN SMALL LETTER Z WITH CARON + u'\u02d9' # 0xFF -> DOT ABOVE ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0x8D, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0x9D, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0x8F, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xAF, # LATIN CAPITAL LETTER AE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xA8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xBF, # LATIN SMALL LETTER AE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xB8, # LATIN SMALL LETTER O WITH STROKE - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0100: 0xC2, # LATIN CAPITAL LETTER A WITH MACRON - 0x0101: 0xE2, # LATIN SMALL LETTER A WITH MACRON - 0x0104: 0xC0, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xE0, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0xC3, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xE3, # LATIN SMALL LETTER C WITH ACUTE - 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON - 0x0112: 0xC7, # LATIN CAPITAL LETTER E WITH MACRON - 0x0113: 0xE7, # LATIN SMALL LETTER E WITH MACRON - 0x0116: 0xCB, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0117: 0xEB, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0xC6, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xE6, # LATIN SMALL LETTER E WITH OGONEK - 0x0122: 0xCC, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0xEC, # LATIN SMALL LETTER G WITH CEDILLA - 0x012A: 0xCE, # LATIN CAPITAL LETTER I WITH MACRON - 0x012B: 0xEE, # LATIN SMALL LETTER I WITH MACRON - 0x012E: 0xC1, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012F: 0xE1, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0xCD, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0xED, # LATIN SMALL LETTER K WITH CEDILLA - 0x013B: 0xCF, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013C: 0xEF, # LATIN SMALL LETTER L WITH CEDILLA - 0x0141: 0xD9, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xF9, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE - 0x0145: 0xD2, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0xF2, # LATIN SMALL LETTER N WITH CEDILLA - 0x014C: 0xD4, # LATIN CAPITAL LETTER O WITH MACRON - 0x014D: 0xF4, # LATIN SMALL LETTER O WITH MACRON - 0x0156: 0xAA, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x0157: 0xBA, # LATIN SMALL LETTER R WITH CEDILLA - 0x015A: 0xDA, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015B: 0xFA, # LATIN SMALL LETTER S WITH ACUTE - 0x0160: 0xD0, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xF0, # LATIN SMALL LETTER S WITH CARON - 0x016A: 0xDB, # LATIN CAPITAL LETTER U WITH MACRON - 0x016B: 0xFB, # LATIN SMALL LETTER U WITH MACRON - 0x0172: 0xD8, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0xF8, # LATIN SMALL LETTER U WITH OGONEK - 0x0179: 0xCA, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017A: 0xEA, # LATIN SMALL LETTER Z WITH ACUTE - 0x017B: 0xDD, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017C: 0xFD, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017D: 0xDE, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xFE, # LATIN SMALL LETTER Z WITH CARON - 0x02C7: 0x8E, # CARON - 0x02D9: 0xFF, # DOT ABOVE - 0x02DB: 0x9E, # OGONEK - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AC: 0x80, # EURO SIGN - 0x2122: 0x99, # TRADE MARK SIGN + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0x8D, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0x9D, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0x8F, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xAF, # LATIN CAPITAL LETTER AE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xA8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xBF, # LATIN SMALL LETTER AE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xB8, # LATIN SMALL LETTER O WITH STROKE + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0100: 0xC2, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0xE2, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0xC0, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0xE0, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0xC3, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0xE3, # LATIN SMALL LETTER C WITH ACUTE + 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON + 0x0112: 0xC7, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0xE7, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0xCB, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0xEB, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0xC6, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0xE6, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0xCC, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0xEC, # LATIN SMALL LETTER G WITH CEDILLA + 0x012A: 0xCE, # LATIN CAPITAL LETTER I WITH MACRON + 0x012B: 0xEE, # LATIN SMALL LETTER I WITH MACRON + 0x012E: 0xC1, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012F: 0xE1, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0xCD, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0xED, # LATIN SMALL LETTER K WITH CEDILLA + 0x013B: 0xCF, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013C: 0xEF, # LATIN SMALL LETTER L WITH CEDILLA + 0x0141: 0xD9, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0xF9, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE + 0x0145: 0xD2, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0xF2, # LATIN SMALL LETTER N WITH CEDILLA + 0x014C: 0xD4, # LATIN CAPITAL LETTER O WITH MACRON + 0x014D: 0xF4, # LATIN SMALL LETTER O WITH MACRON + 0x0156: 0xAA, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0xBA, # LATIN SMALL LETTER R WITH CEDILLA + 0x015A: 0xDA, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015B: 0xFA, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0xD0, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xF0, # LATIN SMALL LETTER S WITH CARON + 0x016A: 0xDB, # LATIN CAPITAL LETTER U WITH MACRON + 0x016B: 0xFB, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0xD8, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0xF8, # LATIN SMALL LETTER U WITH OGONEK + 0x0179: 0xCA, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017A: 0xEA, # LATIN SMALL LETTER Z WITH ACUTE + 0x017B: 0xDD, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017C: 0xFD, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017D: 0xDE, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xFE, # LATIN SMALL LETTER Z WITH CARON + 0x02C7: 0x8E, # CARON + 0x02D9: 0xFF, # DOT ABOVE + 0x02DB: 0x9E, # OGONEK + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AC: 0x80, # EURO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } - Modified: python/branches/ssize_t/Lib/encodings/cp1258.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp1258.py (original) +++ python/branches/ssize_t/Lib/encodings/cp1258.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,513 +32,512 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\ufffe' # 0x8A -> UNDEFINED - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\u02dc' # 0x98 -> SMALL TILDE - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\ufffe' # 0x9A -> UNDEFINED - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE - u'\ufffe' # 0x9D -> UNDEFINED - u'\ufffe' # 0x9E -> UNDEFINED - u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u0300' # 0xCC -> COMBINING GRAVE ACCENT - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\u0309' # 0xD2 -> COMBINING HOOK ABOVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u01a0' # 0xD5 -> LATIN CAPITAL LETTER O WITH HORN - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u01af' # 0xDD -> LATIN CAPITAL LETTER U WITH HORN - u'\u0303' # 0xDE -> COMBINING TILDE - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0301' # 0xEC -> COMBINING ACUTE ACCENT - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\u0323' # 0xF2 -> COMBINING DOT BELOW - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u01a1' # 0xF5 -> LATIN SMALL LETTER O WITH HORN - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u01b0' # 0xFD -> LATIN SMALL LETTER U WITH HORN - u'\u20ab' # 0xFE -> DONG SIGN - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\ufffe' # 0x81 -> UNDEFINED + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\ufffe' # 0x8A -> UNDEFINED + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\ufffe' # 0x8F -> UNDEFINED + u'\ufffe' # 0x90 -> UNDEFINED + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\u02dc' # 0x98 -> SMALL TILDE + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\ufffe' # 0x9A -> UNDEFINED + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE + u'\ufffe' # 0x9D -> UNDEFINED + u'\ufffe' # 0x9E -> UNDEFINED + u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u0300' # 0xCC -> COMBINING GRAVE ACCENT + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\u0309' # 0xD2 -> COMBINING HOOK ABOVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u01a0' # 0xD5 -> LATIN CAPITAL LETTER O WITH HORN + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u01af' # 0xDD -> LATIN CAPITAL LETTER U WITH HORN + u'\u0303' # 0xDE -> COMBINING TILDE + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0301' # 0xEC -> COMBINING ACUTE ACCENT + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\u0323' # 0xF2 -> COMBINING DOT BELOW + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u01a1' # 0xF5 -> LATIN SMALL LETTER O WITH HORN + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u01b0' # 0xFD -> LATIN SMALL LETTER U WITH HORN + u'\u20ab' # 0xFE -> DONG SIGN + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xBF, # INVERTED QUESTION MARK - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE - 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE - 0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE - 0x0153: 0x9C, # LATIN SMALL LIGATURE OE - 0x0178: 0x9F, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x01A0: 0xD5, # LATIN CAPITAL LETTER O WITH HORN - 0x01A1: 0xF5, # LATIN SMALL LETTER O WITH HORN - 0x01AF: 0xDD, # LATIN CAPITAL LETTER U WITH HORN - 0x01B0: 0xFD, # LATIN SMALL LETTER U WITH HORN - 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02DC: 0x98, # SMALL TILDE - 0x0300: 0xCC, # COMBINING GRAVE ACCENT - 0x0301: 0xEC, # COMBINING ACUTE ACCENT - 0x0303: 0xDE, # COMBINING TILDE - 0x0309: 0xD2, # COMBINING HOOK ABOVE - 0x0323: 0xF2, # COMBINING DOT BELOW - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AB: 0xFE, # DONG SIGN - 0x20AC: 0x80, # EURO SIGN - 0x2122: 0x99, # TRADE MARK SIGN + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xBF, # INVERTED QUESTION MARK + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE + 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE + 0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x9C, # LATIN SMALL LIGATURE OE + 0x0178: 0x9F, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK + 0x01A0: 0xD5, # LATIN CAPITAL LETTER O WITH HORN + 0x01A1: 0xF5, # LATIN SMALL LETTER O WITH HORN + 0x01AF: 0xDD, # LATIN CAPITAL LETTER U WITH HORN + 0x01B0: 0xFD, # LATIN SMALL LETTER U WITH HORN + 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02DC: 0x98, # SMALL TILDE + 0x0300: 0xCC, # COMBINING GRAVE ACCENT + 0x0301: 0xEC, # COMBINING ACUTE ACCENT + 0x0303: 0xDE, # COMBINING TILDE + 0x0309: 0xD2, # COMBINING HOOK ABOVE + 0x0323: 0xF2, # COMBINING DOT BELOW + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AB: 0xFE, # DONG SIGN + 0x20AC: 0x80, # EURO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } - Modified: python/branches/ssize_t/Lib/encodings/cp424.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp424.py (original) +++ python/branches/ssize_t/Lib/encodings/cp424.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,484 +32,483 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> SELECT - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> REQUIRED NEW LINE - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> GRAPHIC ESCAPE - u'\x8d' # 0x09 -> SUPERSCRIPT - u'\x8e' # 0x0A -> REPEAT - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> RESTORE/ENABLE PRESENTATION - u'\x85' # 0x15 -> NEW LINE - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> PROGRAM OPERATOR COMMUNICATION - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> UNIT BACK SPACE - u'\x8f' # 0x1B -> CUSTOMER USE ONE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> DIGIT SELECT - u'\x81' # 0x21 -> START OF SIGNIFICANCE - u'\x82' # 0x22 -> FIELD SEPARATOR - u'\x83' # 0x23 -> WORD UNDERSCORE - u'\x84' # 0x24 -> BYPASS OR INHIBIT PRESENTATION - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> SET ATTRIBUTE - u'\x89' # 0x29 -> START FIELD EXTENDED - u'\x8a' # 0x2A -> SET MODE OR SWITCH - u'\x8b' # 0x2B -> CONTROL SEQUENCE PREFIX - u'\x8c' # 0x2C -> MODIFY FIELD ATTRIBUTE - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> - u'\x91' # 0x31 -> - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> INDEX RETURN - u'\x94' # 0x34 -> PRESENTATION POSITION - u'\x95' # 0x35 -> TRANSPARENT - u'\x96' # 0x36 -> NUMERIC BACKSPACE - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> SUBSCRIPT - u'\x99' # 0x39 -> INDENT TABULATION - u'\x9a' # 0x3A -> REVERSE FORM FEED - u'\x9b' # 0x3B -> CUSTOMER USE THREE - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\u05d0' # 0x41 -> HEBREW LETTER ALEF - u'\u05d1' # 0x42 -> HEBREW LETTER BET - u'\u05d2' # 0x43 -> HEBREW LETTER GIMEL - u'\u05d3' # 0x44 -> HEBREW LETTER DALET - u'\u05d4' # 0x45 -> HEBREW LETTER HE - u'\u05d5' # 0x46 -> HEBREW LETTER VAV - u'\u05d6' # 0x47 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0x48 -> HEBREW LETTER HET - u'\u05d8' # 0x49 -> HEBREW LETTER TET - u'\xa2' # 0x4A -> CENT SIGN - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'|' # 0x4F -> VERTICAL LINE - u'&' # 0x50 -> AMPERSAND - u'\u05d9' # 0x51 -> HEBREW LETTER YOD - u'\u05da' # 0x52 -> HEBREW LETTER FINAL KAF - u'\u05db' # 0x53 -> HEBREW LETTER KAF - u'\u05dc' # 0x54 -> HEBREW LETTER LAMED - u'\u05dd' # 0x55 -> HEBREW LETTER FINAL MEM - u'\u05de' # 0x56 -> HEBREW LETTER MEM - u'\u05df' # 0x57 -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0x58 -> HEBREW LETTER NUN - u'\u05e1' # 0x59 -> HEBREW LETTER SAMEKH - u'!' # 0x5A -> EXCLAMATION MARK - u'$' # 0x5B -> DOLLAR SIGN - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'\xac' # 0x5F -> NOT SIGN - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\u05e2' # 0x62 -> HEBREW LETTER AYIN - u'\u05e3' # 0x63 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0x64 -> HEBREW LETTER PE - u'\u05e5' # 0x65 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0x66 -> HEBREW LETTER TSADI - u'\u05e7' # 0x67 -> HEBREW LETTER QOF - u'\u05e8' # 0x68 -> HEBREW LETTER RESH - u'\u05e9' # 0x69 -> HEBREW LETTER SHIN - u'\xa6' # 0x6A -> BROKEN BAR - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\ufffe' # 0x70 -> UNDEFINED - u'\u05ea' # 0x71 -> HEBREW LETTER TAV - u'\ufffe' # 0x72 -> UNDEFINED - u'\ufffe' # 0x73 -> UNDEFINED - u'\xa0' # 0x74 -> NO-BREAK SPACE - u'\ufffe' # 0x75 -> UNDEFINED - u'\ufffe' # 0x76 -> UNDEFINED - u'\ufffe' # 0x77 -> UNDEFINED - u'\u2017' # 0x78 -> DOUBLE LOW LINE - u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7A -> COLON - u'#' # 0x7B -> NUMBER SIGN - u'@' # 0x7C -> COMMERCIAL AT - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'"' # 0x7F -> QUOTATION MARK - u'\ufffe' # 0x80 -> UNDEFINED - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\ufffe' # 0x8C -> UNDEFINED - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\xb1' # 0x8F -> PLUS-MINUS SIGN - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\ufffe' # 0x9A -> UNDEFINED - u'\ufffe' # 0x9B -> UNDEFINED - u'\ufffe' # 0x9C -> UNDEFINED - u'\xb8' # 0x9D -> CEDILLA - u'\ufffe' # 0x9E -> UNDEFINED - u'\xa4' # 0x9F -> CURRENCY SIGN - u'\xb5' # 0xA0 -> MICRO SIGN - u'~' # 0xA1 -> TILDE - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\ufffe' # 0xAA -> UNDEFINED - u'\ufffe' # 0xAB -> UNDEFINED - u'\ufffe' # 0xAC -> UNDEFINED - u'\ufffe' # 0xAD -> UNDEFINED - u'\ufffe' # 0xAE -> UNDEFINED - u'\xae' # 0xAF -> REGISTERED SIGN - u'^' # 0xB0 -> CIRCUMFLEX ACCENT - u'\xa3' # 0xB1 -> POUND SIGN - u'\xa5' # 0xB2 -> YEN SIGN - u'\xb7' # 0xB3 -> MIDDLE DOT - u'\xa9' # 0xB4 -> COPYRIGHT SIGN - u'\xa7' # 0xB5 -> SECTION SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS - u'[' # 0xBA -> LEFT SQUARE BRACKET - u']' # 0xBB -> RIGHT SQUARE BRACKET - u'\xaf' # 0xBC -> MACRON - u'\xa8' # 0xBD -> DIAERESIS - u'\xb4' # 0xBE -> ACUTE ACCENT - u'\xd7' # 0xBF -> MULTIPLICATION SIGN - u'{' # 0xC0 -> LEFT CURLY BRACKET - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\ufffe' # 0xCB -> UNDEFINED - u'\ufffe' # 0xCC -> UNDEFINED - u'\ufffe' # 0xCD -> UNDEFINED - u'\ufffe' # 0xCE -> UNDEFINED - u'\ufffe' # 0xCF -> UNDEFINED - u'}' # 0xD0 -> RIGHT CURLY BRACKET - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xDA -> SUPERSCRIPT ONE - u'\ufffe' # 0xDB -> UNDEFINED - u'\ufffe' # 0xDC -> UNDEFINED - u'\ufffe' # 0xDD -> UNDEFINED - u'\ufffe' # 0xDE -> UNDEFINED - u'\ufffe' # 0xDF -> UNDEFINED - u'\\' # 0xE0 -> REVERSE SOLIDUS - u'\xf7' # 0xE1 -> DIVISION SIGN - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\ufffe' # 0xEB -> UNDEFINED - u'\ufffe' # 0xEC -> UNDEFINED - u'\ufffe' # 0xED -> UNDEFINED - u'\ufffe' # 0xEE -> UNDEFINED - u'\ufffe' # 0xEF -> UNDEFINED - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\ufffe' # 0xFB -> UNDEFINED - u'\ufffe' # 0xFC -> UNDEFINED - u'\ufffe' # 0xFD -> UNDEFINED - u'\ufffe' # 0xFE -> UNDEFINED - u'\x9f' # 0xFF -> EIGHT ONES + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x9c' # 0x04 -> SELECT + u'\t' # 0x05 -> HORIZONTAL TABULATION + u'\x86' # 0x06 -> REQUIRED NEW LINE + u'\x7f' # 0x07 -> DELETE + u'\x97' # 0x08 -> GRAPHIC ESCAPE + u'\x8d' # 0x09 -> SUPERSCRIPT + u'\x8e' # 0x0A -> REPEAT + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x9d' # 0x14 -> RESTORE/ENABLE PRESENTATION + u'\x85' # 0x15 -> NEW LINE + u'\x08' # 0x16 -> BACKSPACE + u'\x87' # 0x17 -> PROGRAM OPERATOR COMMUNICATION + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x92' # 0x1A -> UNIT BACK SPACE + u'\x8f' # 0x1B -> CUSTOMER USE ONE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u'\x80' # 0x20 -> DIGIT SELECT + u'\x81' # 0x21 -> START OF SIGNIFICANCE + u'\x82' # 0x22 -> FIELD SEPARATOR + u'\x83' # 0x23 -> WORD UNDERSCORE + u'\x84' # 0x24 -> BYPASS OR INHIBIT PRESENTATION + u'\n' # 0x25 -> LINE FEED + u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK + u'\x1b' # 0x27 -> ESCAPE + u'\x88' # 0x28 -> SET ATTRIBUTE + u'\x89' # 0x29 -> START FIELD EXTENDED + u'\x8a' # 0x2A -> SET MODE OR SWITCH + u'\x8b' # 0x2B -> CONTROL SEQUENCE PREFIX + u'\x8c' # 0x2C -> MODIFY FIELD ATTRIBUTE + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL + u'\x90' # 0x30 -> + u'\x91' # 0x31 -> + u'\x16' # 0x32 -> SYNCHRONOUS IDLE + u'\x93' # 0x33 -> INDEX RETURN + u'\x94' # 0x34 -> PRESENTATION POSITION + u'\x95' # 0x35 -> TRANSPARENT + u'\x96' # 0x36 -> NUMERIC BACKSPACE + u'\x04' # 0x37 -> END OF TRANSMISSION + u'\x98' # 0x38 -> SUBSCRIPT + u'\x99' # 0x39 -> INDENT TABULATION + u'\x9a' # 0x3A -> REVERSE FORM FEED + u'\x9b' # 0x3B -> CUSTOMER USE THREE + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> + u'\x1a' # 0x3F -> SUBSTITUTE + u' ' # 0x40 -> SPACE + u'\u05d0' # 0x41 -> HEBREW LETTER ALEF + u'\u05d1' # 0x42 -> HEBREW LETTER BET + u'\u05d2' # 0x43 -> HEBREW LETTER GIMEL + u'\u05d3' # 0x44 -> HEBREW LETTER DALET + u'\u05d4' # 0x45 -> HEBREW LETTER HE + u'\u05d5' # 0x46 -> HEBREW LETTER VAV + u'\u05d6' # 0x47 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0x48 -> HEBREW LETTER HET + u'\u05d8' # 0x49 -> HEBREW LETTER TET + u'\xa2' # 0x4A -> CENT SIGN + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'|' # 0x4F -> VERTICAL LINE + u'&' # 0x50 -> AMPERSAND + u'\u05d9' # 0x51 -> HEBREW LETTER YOD + u'\u05da' # 0x52 -> HEBREW LETTER FINAL KAF + u'\u05db' # 0x53 -> HEBREW LETTER KAF + u'\u05dc' # 0x54 -> HEBREW LETTER LAMED + u'\u05dd' # 0x55 -> HEBREW LETTER FINAL MEM + u'\u05de' # 0x56 -> HEBREW LETTER MEM + u'\u05df' # 0x57 -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0x58 -> HEBREW LETTER NUN + u'\u05e1' # 0x59 -> HEBREW LETTER SAMEKH + u'!' # 0x5A -> EXCLAMATION MARK + u'$' # 0x5B -> DOLLAR SIGN + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'\xac' # 0x5F -> NOT SIGN + u'-' # 0x60 -> HYPHEN-MINUS + u'/' # 0x61 -> SOLIDUS + u'\u05e2' # 0x62 -> HEBREW LETTER AYIN + u'\u05e3' # 0x63 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0x64 -> HEBREW LETTER PE + u'\u05e5' # 0x65 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0x66 -> HEBREW LETTER TSADI + u'\u05e7' # 0x67 -> HEBREW LETTER QOF + u'\u05e8' # 0x68 -> HEBREW LETTER RESH + u'\u05e9' # 0x69 -> HEBREW LETTER SHIN + u'\xa6' # 0x6A -> BROKEN BAR + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK + u'\ufffe' # 0x70 -> UNDEFINED + u'\u05ea' # 0x71 -> HEBREW LETTER TAV + u'\ufffe' # 0x72 -> UNDEFINED + u'\ufffe' # 0x73 -> UNDEFINED + u'\xa0' # 0x74 -> NO-BREAK SPACE + u'\ufffe' # 0x75 -> UNDEFINED + u'\ufffe' # 0x76 -> UNDEFINED + u'\ufffe' # 0x77 -> UNDEFINED + u'\u2017' # 0x78 -> DOUBLE LOW LINE + u'`' # 0x79 -> GRAVE ACCENT + u':' # 0x7A -> COLON + u'#' # 0x7B -> NUMBER SIGN + u'@' # 0x7C -> COMMERCIAL AT + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'"' # 0x7F -> QUOTATION MARK + u'\ufffe' # 0x80 -> UNDEFINED + u'a' # 0x81 -> LATIN SMALL LETTER A + u'b' # 0x82 -> LATIN SMALL LETTER B + u'c' # 0x83 -> LATIN SMALL LETTER C + u'd' # 0x84 -> LATIN SMALL LETTER D + u'e' # 0x85 -> LATIN SMALL LETTER E + u'f' # 0x86 -> LATIN SMALL LETTER F + u'g' # 0x87 -> LATIN SMALL LETTER G + u'h' # 0x88 -> LATIN SMALL LETTER H + u'i' # 0x89 -> LATIN SMALL LETTER I + u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\ufffe' # 0x8C -> UNDEFINED + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\xb1' # 0x8F -> PLUS-MINUS SIGN + u'\xb0' # 0x90 -> DEGREE SIGN + u'j' # 0x91 -> LATIN SMALL LETTER J + u'k' # 0x92 -> LATIN SMALL LETTER K + u'l' # 0x93 -> LATIN SMALL LETTER L + u'm' # 0x94 -> LATIN SMALL LETTER M + u'n' # 0x95 -> LATIN SMALL LETTER N + u'o' # 0x96 -> LATIN SMALL LETTER O + u'p' # 0x97 -> LATIN SMALL LETTER P + u'q' # 0x98 -> LATIN SMALL LETTER Q + u'r' # 0x99 -> LATIN SMALL LETTER R + u'\ufffe' # 0x9A -> UNDEFINED + u'\ufffe' # 0x9B -> UNDEFINED + u'\ufffe' # 0x9C -> UNDEFINED + u'\xb8' # 0x9D -> CEDILLA + u'\ufffe' # 0x9E -> UNDEFINED + u'\xa4' # 0x9F -> CURRENCY SIGN + u'\xb5' # 0xA0 -> MICRO SIGN + u'~' # 0xA1 -> TILDE + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\ufffe' # 0xAA -> UNDEFINED + u'\ufffe' # 0xAB -> UNDEFINED + u'\ufffe' # 0xAC -> UNDEFINED + u'\ufffe' # 0xAD -> UNDEFINED + u'\ufffe' # 0xAE -> UNDEFINED + u'\xae' # 0xAF -> REGISTERED SIGN + u'^' # 0xB0 -> CIRCUMFLEX ACCENT + u'\xa3' # 0xB1 -> POUND SIGN + u'\xa5' # 0xB2 -> YEN SIGN + u'\xb7' # 0xB3 -> MIDDLE DOT + u'\xa9' # 0xB4 -> COPYRIGHT SIGN + u'\xa7' # 0xB5 -> SECTION SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS + u'[' # 0xBA -> LEFT SQUARE BRACKET + u']' # 0xBB -> RIGHT SQUARE BRACKET + u'\xaf' # 0xBC -> MACRON + u'\xa8' # 0xBD -> DIAERESIS + u'\xb4' # 0xBE -> ACUTE ACCENT + u'\xd7' # 0xBF -> MULTIPLICATION SIGN + u'{' # 0xC0 -> LEFT CURLY BRACKET + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\ufffe' # 0xCB -> UNDEFINED + u'\ufffe' # 0xCC -> UNDEFINED + u'\ufffe' # 0xCD -> UNDEFINED + u'\ufffe' # 0xCE -> UNDEFINED + u'\ufffe' # 0xCF -> UNDEFINED + u'}' # 0xD0 -> RIGHT CURLY BRACKET + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0xDA -> SUPERSCRIPT ONE + u'\ufffe' # 0xDB -> UNDEFINED + u'\ufffe' # 0xDC -> UNDEFINED + u'\ufffe' # 0xDD -> UNDEFINED + u'\ufffe' # 0xDE -> UNDEFINED + u'\ufffe' # 0xDF -> UNDEFINED + u'\\' # 0xE0 -> REVERSE SOLIDUS + u'\xf7' # 0xE1 -> DIVISION SIGN + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\ufffe' # 0xEB -> UNDEFINED + u'\ufffe' # 0xEC -> UNDEFINED + u'\ufffe' # 0xED -> UNDEFINED + u'\ufffe' # 0xEE -> UNDEFINED + u'\ufffe' # 0xEF -> UNDEFINED + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\ufffe' # 0xFB -> UNDEFINED + u'\ufffe' # 0xFC -> UNDEFINED + u'\ufffe' # 0xFD -> UNDEFINED + u'\ufffe' # 0xFE -> UNDEFINED + u'\x9f' # 0xFF -> EIGHT ONES ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2D, # ENQUIRY - 0x0006: 0x2E, # ACKNOWLEDGE - 0x0007: 0x2F, # BELL - 0x0008: 0x16, # BACKSPACE - 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000A: 0x25, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3C, # DEVICE CONTROL FOUR - 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x32, # SYNCHRONOUS IDLE - 0x0017: 0x26, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x3F, # SUBSTITUTE - 0x001B: 0x27, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x40, # SPACE - 0x0021: 0x5A, # EXCLAMATION MARK - 0x0022: 0x7F, # QUOTATION MARK - 0x0023: 0x7B, # NUMBER SIGN - 0x0024: 0x5B, # DOLLAR SIGN - 0x0025: 0x6C, # PERCENT SIGN - 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7D, # APOSTROPHE - 0x0028: 0x4D, # LEFT PARENTHESIS - 0x0029: 0x5D, # RIGHT PARENTHESIS - 0x002A: 0x5C, # ASTERISK - 0x002B: 0x4E, # PLUS SIGN - 0x002C: 0x6B, # COMMA - 0x002D: 0x60, # HYPHEN-MINUS - 0x002E: 0x4B, # FULL STOP - 0x002F: 0x61, # SOLIDUS - 0x0030: 0xF0, # DIGIT ZERO - 0x0031: 0xF1, # DIGIT ONE - 0x0032: 0xF2, # DIGIT TWO - 0x0033: 0xF3, # DIGIT THREE - 0x0034: 0xF4, # DIGIT FOUR - 0x0035: 0xF5, # DIGIT FIVE - 0x0036: 0xF6, # DIGIT SIX - 0x0037: 0xF7, # DIGIT SEVEN - 0x0038: 0xF8, # DIGIT EIGHT - 0x0039: 0xF9, # DIGIT NINE - 0x003A: 0x7A, # COLON - 0x003B: 0x5E, # SEMICOLON - 0x003C: 0x4C, # LESS-THAN SIGN - 0x003D: 0x7E, # EQUALS SIGN - 0x003E: 0x6E, # GREATER-THAN SIGN - 0x003F: 0x6F, # QUESTION MARK - 0x0040: 0x7C, # COMMERCIAL AT - 0x0041: 0xC1, # LATIN CAPITAL LETTER A - 0x0042: 0xC2, # LATIN CAPITAL LETTER B - 0x0043: 0xC3, # LATIN CAPITAL LETTER C - 0x0044: 0xC4, # LATIN CAPITAL LETTER D - 0x0045: 0xC5, # LATIN CAPITAL LETTER E - 0x0046: 0xC6, # LATIN CAPITAL LETTER F - 0x0047: 0xC7, # LATIN CAPITAL LETTER G - 0x0048: 0xC8, # LATIN CAPITAL LETTER H - 0x0049: 0xC9, # LATIN CAPITAL LETTER I - 0x004A: 0xD1, # LATIN CAPITAL LETTER J - 0x004B: 0xD2, # LATIN CAPITAL LETTER K - 0x004C: 0xD3, # LATIN CAPITAL LETTER L - 0x004D: 0xD4, # LATIN CAPITAL LETTER M - 0x004E: 0xD5, # LATIN CAPITAL LETTER N - 0x004F: 0xD6, # LATIN CAPITAL LETTER O - 0x0050: 0xD7, # LATIN CAPITAL LETTER P - 0x0051: 0xD8, # LATIN CAPITAL LETTER Q - 0x0052: 0xD9, # LATIN CAPITAL LETTER R - 0x0053: 0xE2, # LATIN CAPITAL LETTER S - 0x0054: 0xE3, # LATIN CAPITAL LETTER T - 0x0055: 0xE4, # LATIN CAPITAL LETTER U - 0x0056: 0xE5, # LATIN CAPITAL LETTER V - 0x0057: 0xE6, # LATIN CAPITAL LETTER W - 0x0058: 0xE7, # LATIN CAPITAL LETTER X - 0x0059: 0xE8, # LATIN CAPITAL LETTER Y - 0x005A: 0xE9, # LATIN CAPITAL LETTER Z - 0x005B: 0xBA, # LEFT SQUARE BRACKET - 0x005C: 0xE0, # REVERSE SOLIDUS - 0x005D: 0xBB, # RIGHT SQUARE BRACKET - 0x005E: 0xB0, # CIRCUMFLEX ACCENT - 0x005F: 0x6D, # LOW LINE - 0x0060: 0x79, # GRAVE ACCENT - 0x0061: 0x81, # LATIN SMALL LETTER A - 0x0062: 0x82, # LATIN SMALL LETTER B - 0x0063: 0x83, # LATIN SMALL LETTER C - 0x0064: 0x84, # LATIN SMALL LETTER D - 0x0065: 0x85, # LATIN SMALL LETTER E - 0x0066: 0x86, # LATIN SMALL LETTER F - 0x0067: 0x87, # LATIN SMALL LETTER G - 0x0068: 0x88, # LATIN SMALL LETTER H - 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006A: 0x91, # LATIN SMALL LETTER J - 0x006B: 0x92, # LATIN SMALL LETTER K - 0x006C: 0x93, # LATIN SMALL LETTER L - 0x006D: 0x94, # LATIN SMALL LETTER M - 0x006E: 0x95, # LATIN SMALL LETTER N - 0x006F: 0x96, # LATIN SMALL LETTER O - 0x0070: 0x97, # LATIN SMALL LETTER P - 0x0071: 0x98, # LATIN SMALL LETTER Q - 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xA2, # LATIN SMALL LETTER S - 0x0074: 0xA3, # LATIN SMALL LETTER T - 0x0075: 0xA4, # LATIN SMALL LETTER U - 0x0076: 0xA5, # LATIN SMALL LETTER V - 0x0077: 0xA6, # LATIN SMALL LETTER W - 0x0078: 0xA7, # LATIN SMALL LETTER X - 0x0079: 0xA8, # LATIN SMALL LETTER Y - 0x007A: 0xA9, # LATIN SMALL LETTER Z - 0x007B: 0xC0, # LEFT CURLY BRACKET - 0x007C: 0x4F, # VERTICAL LINE - 0x007D: 0xD0, # RIGHT CURLY BRACKET - 0x007E: 0xA1, # TILDE - 0x007F: 0x07, # DELETE - 0x0080: 0x20, # DIGIT SELECT - 0x0081: 0x21, # START OF SIGNIFICANCE - 0x0082: 0x22, # FIELD SEPARATOR - 0x0083: 0x23, # WORD UNDERSCORE - 0x0084: 0x24, # BYPASS OR INHIBIT PRESENTATION - 0x0085: 0x15, # NEW LINE - 0x0086: 0x06, # REQUIRED NEW LINE - 0x0087: 0x17, # PROGRAM OPERATOR COMMUNICATION - 0x0088: 0x28, # SET ATTRIBUTE - 0x0089: 0x29, # START FIELD EXTENDED - 0x008A: 0x2A, # SET MODE OR SWITCH - 0x008B: 0x2B, # CONTROL SEQUENCE PREFIX - 0x008C: 0x2C, # MODIFY FIELD ATTRIBUTE - 0x008D: 0x09, # SUPERSCRIPT - 0x008E: 0x0A, # REPEAT - 0x008F: 0x1B, # CUSTOMER USE ONE - 0x0090: 0x30, # - 0x0091: 0x31, # - 0x0092: 0x1A, # UNIT BACK SPACE - 0x0093: 0x33, # INDEX RETURN - 0x0094: 0x34, # PRESENTATION POSITION - 0x0095: 0x35, # TRANSPARENT - 0x0096: 0x36, # NUMERIC BACKSPACE - 0x0097: 0x08, # GRAPHIC ESCAPE - 0x0098: 0x38, # SUBSCRIPT - 0x0099: 0x39, # INDENT TABULATION - 0x009A: 0x3A, # REVERSE FORM FEED - 0x009B: 0x3B, # CUSTOMER USE THREE - 0x009C: 0x04, # SELECT - 0x009D: 0x14, # RESTORE/ENABLE PRESENTATION - 0x009E: 0x3E, # - 0x009F: 0xFF, # EIGHT ONES - 0x00A0: 0x74, # NO-BREAK SPACE - 0x00A2: 0x4A, # CENT SIGN - 0x00A3: 0xB1, # POUND SIGN - 0x00A4: 0x9F, # CURRENCY SIGN - 0x00A5: 0xB2, # YEN SIGN - 0x00A6: 0x6A, # BROKEN BAR - 0x00A7: 0xB5, # SECTION SIGN - 0x00A8: 0xBD, # DIAERESIS - 0x00A9: 0xB4, # COPYRIGHT SIGN - 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0x5F, # NOT SIGN - 0x00AD: 0xCA, # SOFT HYPHEN - 0x00AE: 0xAF, # REGISTERED SIGN - 0x00AF: 0xBC, # MACRON - 0x00B0: 0x90, # DEGREE SIGN - 0x00B1: 0x8F, # PLUS-MINUS SIGN - 0x00B2: 0xEA, # SUPERSCRIPT TWO - 0x00B3: 0xFA, # SUPERSCRIPT THREE - 0x00B4: 0xBE, # ACUTE ACCENT - 0x00B5: 0xA0, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB3, # MIDDLE DOT - 0x00B8: 0x9D, # CEDILLA - 0x00B9: 0xDA, # SUPERSCRIPT ONE - 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF - 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS - 0x00D7: 0xBF, # MULTIPLICATION SIGN - 0x00F7: 0xE1, # DIVISION SIGN - 0x05D0: 0x41, # HEBREW LETTER ALEF - 0x05D1: 0x42, # HEBREW LETTER BET - 0x05D2: 0x43, # HEBREW LETTER GIMEL - 0x05D3: 0x44, # HEBREW LETTER DALET - 0x05D4: 0x45, # HEBREW LETTER HE - 0x05D5: 0x46, # HEBREW LETTER VAV - 0x05D6: 0x47, # HEBREW LETTER ZAYIN - 0x05D7: 0x48, # HEBREW LETTER HET - 0x05D8: 0x49, # HEBREW LETTER TET - 0x05D9: 0x51, # HEBREW LETTER YOD - 0x05DA: 0x52, # HEBREW LETTER FINAL KAF - 0x05DB: 0x53, # HEBREW LETTER KAF - 0x05DC: 0x54, # HEBREW LETTER LAMED - 0x05DD: 0x55, # HEBREW LETTER FINAL MEM - 0x05DE: 0x56, # HEBREW LETTER MEM - 0x05DF: 0x57, # HEBREW LETTER FINAL NUN - 0x05E0: 0x58, # HEBREW LETTER NUN - 0x05E1: 0x59, # HEBREW LETTER SAMEKH - 0x05E2: 0x62, # HEBREW LETTER AYIN - 0x05E3: 0x63, # HEBREW LETTER FINAL PE - 0x05E4: 0x64, # HEBREW LETTER PE - 0x05E5: 0x65, # HEBREW LETTER FINAL TSADI - 0x05E6: 0x66, # HEBREW LETTER TSADI - 0x05E7: 0x67, # HEBREW LETTER QOF - 0x05E8: 0x68, # HEBREW LETTER RESH - 0x05E9: 0x69, # HEBREW LETTER SHIN - 0x05EA: 0x71, # HEBREW LETTER TAV - 0x2017: 0x78, # DOUBLE LOW LINE + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x37, # END OF TRANSMISSION + 0x0005: 0x2D, # ENQUIRY + 0x0006: 0x2E, # ACKNOWLEDGE + 0x0007: 0x2F, # BELL + 0x0008: 0x16, # BACKSPACE + 0x0009: 0x05, # HORIZONTAL TABULATION + 0x000A: 0x25, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x3C, # DEVICE CONTROL FOUR + 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x32, # SYNCHRONOUS IDLE + 0x0017: 0x26, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x3F, # SUBSTITUTE + 0x001B: 0x27, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x40, # SPACE + 0x0021: 0x5A, # EXCLAMATION MARK + 0x0022: 0x7F, # QUOTATION MARK + 0x0023: 0x7B, # NUMBER SIGN + 0x0024: 0x5B, # DOLLAR SIGN + 0x0025: 0x6C, # PERCENT SIGN + 0x0026: 0x50, # AMPERSAND + 0x0027: 0x7D, # APOSTROPHE + 0x0028: 0x4D, # LEFT PARENTHESIS + 0x0029: 0x5D, # RIGHT PARENTHESIS + 0x002A: 0x5C, # ASTERISK + 0x002B: 0x4E, # PLUS SIGN + 0x002C: 0x6B, # COMMA + 0x002D: 0x60, # HYPHEN-MINUS + 0x002E: 0x4B, # FULL STOP + 0x002F: 0x61, # SOLIDUS + 0x0030: 0xF0, # DIGIT ZERO + 0x0031: 0xF1, # DIGIT ONE + 0x0032: 0xF2, # DIGIT TWO + 0x0033: 0xF3, # DIGIT THREE + 0x0034: 0xF4, # DIGIT FOUR + 0x0035: 0xF5, # DIGIT FIVE + 0x0036: 0xF6, # DIGIT SIX + 0x0037: 0xF7, # DIGIT SEVEN + 0x0038: 0xF8, # DIGIT EIGHT + 0x0039: 0xF9, # DIGIT NINE + 0x003A: 0x7A, # COLON + 0x003B: 0x5E, # SEMICOLON + 0x003C: 0x4C, # LESS-THAN SIGN + 0x003D: 0x7E, # EQUALS SIGN + 0x003E: 0x6E, # GREATER-THAN SIGN + 0x003F: 0x6F, # QUESTION MARK + 0x0040: 0x7C, # COMMERCIAL AT + 0x0041: 0xC1, # LATIN CAPITAL LETTER A + 0x0042: 0xC2, # LATIN CAPITAL LETTER B + 0x0043: 0xC3, # LATIN CAPITAL LETTER C + 0x0044: 0xC4, # LATIN CAPITAL LETTER D + 0x0045: 0xC5, # LATIN CAPITAL LETTER E + 0x0046: 0xC6, # LATIN CAPITAL LETTER F + 0x0047: 0xC7, # LATIN CAPITAL LETTER G + 0x0048: 0xC8, # LATIN CAPITAL LETTER H + 0x0049: 0xC9, # LATIN CAPITAL LETTER I + 0x004A: 0xD1, # LATIN CAPITAL LETTER J + 0x004B: 0xD2, # LATIN CAPITAL LETTER K + 0x004C: 0xD3, # LATIN CAPITAL LETTER L + 0x004D: 0xD4, # LATIN CAPITAL LETTER M + 0x004E: 0xD5, # LATIN CAPITAL LETTER N + 0x004F: 0xD6, # LATIN CAPITAL LETTER O + 0x0050: 0xD7, # LATIN CAPITAL LETTER P + 0x0051: 0xD8, # LATIN CAPITAL LETTER Q + 0x0052: 0xD9, # LATIN CAPITAL LETTER R + 0x0053: 0xE2, # LATIN CAPITAL LETTER S + 0x0054: 0xE3, # LATIN CAPITAL LETTER T + 0x0055: 0xE4, # LATIN CAPITAL LETTER U + 0x0056: 0xE5, # LATIN CAPITAL LETTER V + 0x0057: 0xE6, # LATIN CAPITAL LETTER W + 0x0058: 0xE7, # LATIN CAPITAL LETTER X + 0x0059: 0xE8, # LATIN CAPITAL LETTER Y + 0x005A: 0xE9, # LATIN CAPITAL LETTER Z + 0x005B: 0xBA, # LEFT SQUARE BRACKET + 0x005C: 0xE0, # REVERSE SOLIDUS + 0x005D: 0xBB, # RIGHT SQUARE BRACKET + 0x005E: 0xB0, # CIRCUMFLEX ACCENT + 0x005F: 0x6D, # LOW LINE + 0x0060: 0x79, # GRAVE ACCENT + 0x0061: 0x81, # LATIN SMALL LETTER A + 0x0062: 0x82, # LATIN SMALL LETTER B + 0x0063: 0x83, # LATIN SMALL LETTER C + 0x0064: 0x84, # LATIN SMALL LETTER D + 0x0065: 0x85, # LATIN SMALL LETTER E + 0x0066: 0x86, # LATIN SMALL LETTER F + 0x0067: 0x87, # LATIN SMALL LETTER G + 0x0068: 0x88, # LATIN SMALL LETTER H + 0x0069: 0x89, # LATIN SMALL LETTER I + 0x006A: 0x91, # LATIN SMALL LETTER J + 0x006B: 0x92, # LATIN SMALL LETTER K + 0x006C: 0x93, # LATIN SMALL LETTER L + 0x006D: 0x94, # LATIN SMALL LETTER M + 0x006E: 0x95, # LATIN SMALL LETTER N + 0x006F: 0x96, # LATIN SMALL LETTER O + 0x0070: 0x97, # LATIN SMALL LETTER P + 0x0071: 0x98, # LATIN SMALL LETTER Q + 0x0072: 0x99, # LATIN SMALL LETTER R + 0x0073: 0xA2, # LATIN SMALL LETTER S + 0x0074: 0xA3, # LATIN SMALL LETTER T + 0x0075: 0xA4, # LATIN SMALL LETTER U + 0x0076: 0xA5, # LATIN SMALL LETTER V + 0x0077: 0xA6, # LATIN SMALL LETTER W + 0x0078: 0xA7, # LATIN SMALL LETTER X + 0x0079: 0xA8, # LATIN SMALL LETTER Y + 0x007A: 0xA9, # LATIN SMALL LETTER Z + 0x007B: 0xC0, # LEFT CURLY BRACKET + 0x007C: 0x4F, # VERTICAL LINE + 0x007D: 0xD0, # RIGHT CURLY BRACKET + 0x007E: 0xA1, # TILDE + 0x007F: 0x07, # DELETE + 0x0080: 0x20, # DIGIT SELECT + 0x0081: 0x21, # START OF SIGNIFICANCE + 0x0082: 0x22, # FIELD SEPARATOR + 0x0083: 0x23, # WORD UNDERSCORE + 0x0084: 0x24, # BYPASS OR INHIBIT PRESENTATION + 0x0085: 0x15, # NEW LINE + 0x0086: 0x06, # REQUIRED NEW LINE + 0x0087: 0x17, # PROGRAM OPERATOR COMMUNICATION + 0x0088: 0x28, # SET ATTRIBUTE + 0x0089: 0x29, # START FIELD EXTENDED + 0x008A: 0x2A, # SET MODE OR SWITCH + 0x008B: 0x2B, # CONTROL SEQUENCE PREFIX + 0x008C: 0x2C, # MODIFY FIELD ATTRIBUTE + 0x008D: 0x09, # SUPERSCRIPT + 0x008E: 0x0A, # REPEAT + 0x008F: 0x1B, # CUSTOMER USE ONE + 0x0090: 0x30, # + 0x0091: 0x31, # + 0x0092: 0x1A, # UNIT BACK SPACE + 0x0093: 0x33, # INDEX RETURN + 0x0094: 0x34, # PRESENTATION POSITION + 0x0095: 0x35, # TRANSPARENT + 0x0096: 0x36, # NUMERIC BACKSPACE + 0x0097: 0x08, # GRAPHIC ESCAPE + 0x0098: 0x38, # SUBSCRIPT + 0x0099: 0x39, # INDENT TABULATION + 0x009A: 0x3A, # REVERSE FORM FEED + 0x009B: 0x3B, # CUSTOMER USE THREE + 0x009C: 0x04, # SELECT + 0x009D: 0x14, # RESTORE/ENABLE PRESENTATION + 0x009E: 0x3E, # + 0x009F: 0xFF, # EIGHT ONES + 0x00A0: 0x74, # NO-BREAK SPACE + 0x00A2: 0x4A, # CENT SIGN + 0x00A3: 0xB1, # POUND SIGN + 0x00A4: 0x9F, # CURRENCY SIGN + 0x00A5: 0xB2, # YEN SIGN + 0x00A6: 0x6A, # BROKEN BAR + 0x00A7: 0xB5, # SECTION SIGN + 0x00A8: 0xBD, # DIAERESIS + 0x00A9: 0xB4, # COPYRIGHT SIGN + 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0x5F, # NOT SIGN + 0x00AD: 0xCA, # SOFT HYPHEN + 0x00AE: 0xAF, # REGISTERED SIGN + 0x00AF: 0xBC, # MACRON + 0x00B0: 0x90, # DEGREE SIGN + 0x00B1: 0x8F, # PLUS-MINUS SIGN + 0x00B2: 0xEA, # SUPERSCRIPT TWO + 0x00B3: 0xFA, # SUPERSCRIPT THREE + 0x00B4: 0xBE, # ACUTE ACCENT + 0x00B5: 0xA0, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB3, # MIDDLE DOT + 0x00B8: 0x9D, # CEDILLA + 0x00B9: 0xDA, # SUPERSCRIPT ONE + 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF + 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS + 0x00D7: 0xBF, # MULTIPLICATION SIGN + 0x00F7: 0xE1, # DIVISION SIGN + 0x05D0: 0x41, # HEBREW LETTER ALEF + 0x05D1: 0x42, # HEBREW LETTER BET + 0x05D2: 0x43, # HEBREW LETTER GIMEL + 0x05D3: 0x44, # HEBREW LETTER DALET + 0x05D4: 0x45, # HEBREW LETTER HE + 0x05D5: 0x46, # HEBREW LETTER VAV + 0x05D6: 0x47, # HEBREW LETTER ZAYIN + 0x05D7: 0x48, # HEBREW LETTER HET + 0x05D8: 0x49, # HEBREW LETTER TET + 0x05D9: 0x51, # HEBREW LETTER YOD + 0x05DA: 0x52, # HEBREW LETTER FINAL KAF + 0x05DB: 0x53, # HEBREW LETTER KAF + 0x05DC: 0x54, # HEBREW LETTER LAMED + 0x05DD: 0x55, # HEBREW LETTER FINAL MEM + 0x05DE: 0x56, # HEBREW LETTER MEM + 0x05DF: 0x57, # HEBREW LETTER FINAL NUN + 0x05E0: 0x58, # HEBREW LETTER NUN + 0x05E1: 0x59, # HEBREW LETTER SAMEKH + 0x05E2: 0x62, # HEBREW LETTER AYIN + 0x05E3: 0x63, # HEBREW LETTER FINAL PE + 0x05E4: 0x64, # HEBREW LETTER PE + 0x05E5: 0x65, # HEBREW LETTER FINAL TSADI + 0x05E6: 0x66, # HEBREW LETTER TSADI + 0x05E7: 0x67, # HEBREW LETTER QOF + 0x05E8: 0x68, # HEBREW LETTER RESH + 0x05E9: 0x69, # HEBREW LETTER SHIN + 0x05EA: 0x71, # HEBREW LETTER TAV + 0x2017: 0x78, # DOUBLE LOW LINE } - Modified: python/branches/ssize_t/Lib/encodings/cp437.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp437.py (original) +++ python/branches/ssize_t/Lib/encodings/cp437.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,654 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00a5, # YEN SIGN - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00a5, # YEN SIGN + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xa2' # 0x009b -> CENT SIGN - u'\xa3' # 0x009c -> POUND SIGN - u'\xa5' # 0x009d -> YEN SIGN - u'\u20a7' # 0x009e -> PESETA SIGN - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\u2310' # 0x00a9 -> REVERSED NOT SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xa2' # 0x009b -> CENT SIGN + u'\xa3' # 0x009c -> POUND SIGN + u'\xa5' # 0x009d -> YEN SIGN + u'\u20a7' # 0x009e -> PESETA SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a2: 0x009b, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a5: 0x009d, # YEN SIGN - 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x20a7: 0x009e, # PESETA SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2310: 0x00a9, # REVERSED NOT SIGN - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} \ No newline at end of file + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x009b, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a5: 0x009d, # YEN SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} Modified: python/branches/ssize_t/Lib/encodings/cp500.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp500.py (original) +++ python/branches/ssize_t/Lib/encodings/cp500.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> CONTROL - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> CONTROL - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> CONTROL - u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0A -> CONTROL - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> CONTROL - u'\x85' # 0x15 -> CONTROL - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> CONTROL - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> CONTROL - u'\x8f' # 0x1B -> CONTROL - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> CONTROL - u'\x81' # 0x21 -> CONTROL - u'\x82' # 0x22 -> CONTROL - u'\x83' # 0x23 -> CONTROL - u'\x84' # 0x24 -> CONTROL - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> CONTROL - u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2A -> CONTROL - u'\x8b' # 0x2B -> CONTROL - u'\x8c' # 0x2C -> CONTROL - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> CONTROL - u'\x91' # 0x31 -> CONTROL - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> CONTROL - u'\x94' # 0x34 -> CONTROL - u'\x95' # 0x35 -> CONTROL - u'\x96' # 0x36 -> CONTROL - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> CONTROL - u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3A -> CONTROL - u'\x9b' # 0x3B -> CONTROL - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> CONTROL - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\xa0' # 0x41 -> NO-BREAK SPACE - u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE - u'[' # 0x4A -> LEFT SQUARE BRACKET - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'!' # 0x4F -> EXCLAMATION MARK - u'&' # 0x50 -> AMPERSAND - u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE - u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE - u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) - u']' # 0x5A -> RIGHT SQUARE BRACKET - u'$' # 0x5B -> DOLLAR SIGN - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'^' # 0x5F -> CIRCUMFLEX ACCENT - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xa6' # 0x6A -> BROKEN BAR - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE - u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE - u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7A -> COLON - u'#' # 0x7B -> NUMBER SIGN - u'@' # 0x7C -> COMMERCIAL AT - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'"' # 0x7F -> QUOTATION MARK - u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) - u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) - u'\xb1' # 0x8F -> PLUS-MINUS SIGN - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR - u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE - u'\xb8' # 0x9D -> CEDILLA - u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE - u'\xa4' # 0x9F -> CURRENCY SIGN - u'\xb5' # 0xA0 -> MICRO SIGN - u'~' # 0xA1 -> TILDE - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK - u'\xbf' # 0xAB -> INVERTED QUESTION MARK - u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) - u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) - u'\xae' # 0xAF -> REGISTERED SIGN - u'\xa2' # 0xB0 -> CENT SIGN - u'\xa3' # 0xB1 -> POUND SIGN - u'\xa5' # 0xB2 -> YEN SIGN - u'\xb7' # 0xB3 -> MIDDLE DOT - u'\xa9' # 0xB4 -> COPYRIGHT SIGN - u'\xa7' # 0xB5 -> SECTION SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS - u'\xac' # 0xBA -> NOT SIGN - u'|' # 0xBB -> VERTICAL LINE - u'\xaf' # 0xBC -> MACRON - u'\xa8' # 0xBD -> DIAERESIS - u'\xb4' # 0xBE -> ACUTE ACCENT - u'\xd7' # 0xBF -> MULTIPLICATION SIGN - u'{' # 0xC0 -> LEFT CURLY BRACKET - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE - u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE - u'}' # 0xD0 -> RIGHT CURLY BRACKET - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xDA -> SUPERSCRIPT ONE - u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE - u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\\' # 0xE0 -> REVERSE SOLIDUS - u'\xf7' # 0xE1 -> DIVISION SIGN - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE - u'\x9f' # 0xFF -> CONTROL + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x9c' # 0x04 -> CONTROL + u'\t' # 0x05 -> HORIZONTAL TABULATION + u'\x86' # 0x06 -> CONTROL + u'\x7f' # 0x07 -> DELETE + u'\x97' # 0x08 -> CONTROL + u'\x8d' # 0x09 -> CONTROL + u'\x8e' # 0x0A -> CONTROL + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x9d' # 0x14 -> CONTROL + u'\x85' # 0x15 -> CONTROL + u'\x08' # 0x16 -> BACKSPACE + u'\x87' # 0x17 -> CONTROL + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x92' # 0x1A -> CONTROL + u'\x8f' # 0x1B -> CONTROL + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u'\x80' # 0x20 -> CONTROL + u'\x81' # 0x21 -> CONTROL + u'\x82' # 0x22 -> CONTROL + u'\x83' # 0x23 -> CONTROL + u'\x84' # 0x24 -> CONTROL + u'\n' # 0x25 -> LINE FEED + u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK + u'\x1b' # 0x27 -> ESCAPE + u'\x88' # 0x28 -> CONTROL + u'\x89' # 0x29 -> CONTROL + u'\x8a' # 0x2A -> CONTROL + u'\x8b' # 0x2B -> CONTROL + u'\x8c' # 0x2C -> CONTROL + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL + u'\x90' # 0x30 -> CONTROL + u'\x91' # 0x31 -> CONTROL + u'\x16' # 0x32 -> SYNCHRONOUS IDLE + u'\x93' # 0x33 -> CONTROL + u'\x94' # 0x34 -> CONTROL + u'\x95' # 0x35 -> CONTROL + u'\x96' # 0x36 -> CONTROL + u'\x04' # 0x37 -> END OF TRANSMISSION + u'\x98' # 0x38 -> CONTROL + u'\x99' # 0x39 -> CONTROL + u'\x9a' # 0x3A -> CONTROL + u'\x9b' # 0x3B -> CONTROL + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> CONTROL + u'\x1a' # 0x3F -> SUBSTITUTE + u' ' # 0x40 -> SPACE + u'\xa0' # 0x41 -> NO-BREAK SPACE + u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE + u'[' # 0x4A -> LEFT SQUARE BRACKET + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'!' # 0x4F -> EXCLAMATION MARK + u'&' # 0x50 -> AMPERSAND + u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE + u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE + u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) + u']' # 0x5A -> RIGHT SQUARE BRACKET + u'$' # 0x5B -> DOLLAR SIGN + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'^' # 0x5F -> CIRCUMFLEX ACCENT + u'-' # 0x60 -> HYPHEN-MINUS + u'/' # 0x61 -> SOLIDUS + u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xa6' # 0x6A -> BROKEN BAR + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK + u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE + u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE + u'`' # 0x79 -> GRAVE ACCENT + u':' # 0x7A -> COLON + u'#' # 0x7B -> NUMBER SIGN + u'@' # 0x7C -> COMMERCIAL AT + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'"' # 0x7F -> QUOTATION MARK + u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE + u'a' # 0x81 -> LATIN SMALL LETTER A + u'b' # 0x82 -> LATIN SMALL LETTER B + u'c' # 0x83 -> LATIN SMALL LETTER C + u'd' # 0x84 -> LATIN SMALL LETTER D + u'e' # 0x85 -> LATIN SMALL LETTER E + u'f' # 0x86 -> LATIN SMALL LETTER F + u'g' # 0x87 -> LATIN SMALL LETTER G + u'h' # 0x88 -> LATIN SMALL LETTER H + u'i' # 0x89 -> LATIN SMALL LETTER I + u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) + u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) + u'\xb1' # 0x8F -> PLUS-MINUS SIGN + u'\xb0' # 0x90 -> DEGREE SIGN + u'j' # 0x91 -> LATIN SMALL LETTER J + u'k' # 0x92 -> LATIN SMALL LETTER K + u'l' # 0x93 -> LATIN SMALL LETTER L + u'm' # 0x94 -> LATIN SMALL LETTER M + u'n' # 0x95 -> LATIN SMALL LETTER N + u'o' # 0x96 -> LATIN SMALL LETTER O + u'p' # 0x97 -> LATIN SMALL LETTER P + u'q' # 0x98 -> LATIN SMALL LETTER Q + u'r' # 0x99 -> LATIN SMALL LETTER R + u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR + u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE + u'\xb8' # 0x9D -> CEDILLA + u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE + u'\xa4' # 0x9F -> CURRENCY SIGN + u'\xb5' # 0xA0 -> MICRO SIGN + u'~' # 0xA1 -> TILDE + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK + u'\xbf' # 0xAB -> INVERTED QUESTION MARK + u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) + u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) + u'\xae' # 0xAF -> REGISTERED SIGN + u'\xa2' # 0xB0 -> CENT SIGN + u'\xa3' # 0xB1 -> POUND SIGN + u'\xa5' # 0xB2 -> YEN SIGN + u'\xb7' # 0xB3 -> MIDDLE DOT + u'\xa9' # 0xB4 -> COPYRIGHT SIGN + u'\xa7' # 0xB5 -> SECTION SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS + u'\xac' # 0xBA -> NOT SIGN + u'|' # 0xBB -> VERTICAL LINE + u'\xaf' # 0xBC -> MACRON + u'\xa8' # 0xBD -> DIAERESIS + u'\xb4' # 0xBE -> ACUTE ACCENT + u'\xd7' # 0xBF -> MULTIPLICATION SIGN + u'{' # 0xC0 -> LEFT CURLY BRACKET + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE + u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE + u'}' # 0xD0 -> RIGHT CURLY BRACKET + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0xDA -> SUPERSCRIPT ONE + u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE + u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\\' # 0xE0 -> REVERSE SOLIDUS + u'\xf7' # 0xE1 -> DIVISION SIGN + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE + u'\x9f' # 0xFF -> CONTROL ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2D, # ENQUIRY - 0x0006: 0x2E, # ACKNOWLEDGE - 0x0007: 0x2F, # BELL - 0x0008: 0x16, # BACKSPACE - 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000A: 0x25, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3C, # DEVICE CONTROL FOUR - 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x32, # SYNCHRONOUS IDLE - 0x0017: 0x26, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x3F, # SUBSTITUTE - 0x001B: 0x27, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x40, # SPACE - 0x0021: 0x4F, # EXCLAMATION MARK - 0x0022: 0x7F, # QUOTATION MARK - 0x0023: 0x7B, # NUMBER SIGN - 0x0024: 0x5B, # DOLLAR SIGN - 0x0025: 0x6C, # PERCENT SIGN - 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7D, # APOSTROPHE - 0x0028: 0x4D, # LEFT PARENTHESIS - 0x0029: 0x5D, # RIGHT PARENTHESIS - 0x002A: 0x5C, # ASTERISK - 0x002B: 0x4E, # PLUS SIGN - 0x002C: 0x6B, # COMMA - 0x002D: 0x60, # HYPHEN-MINUS - 0x002E: 0x4B, # FULL STOP - 0x002F: 0x61, # SOLIDUS - 0x0030: 0xF0, # DIGIT ZERO - 0x0031: 0xF1, # DIGIT ONE - 0x0032: 0xF2, # DIGIT TWO - 0x0033: 0xF3, # DIGIT THREE - 0x0034: 0xF4, # DIGIT FOUR - 0x0035: 0xF5, # DIGIT FIVE - 0x0036: 0xF6, # DIGIT SIX - 0x0037: 0xF7, # DIGIT SEVEN - 0x0038: 0xF8, # DIGIT EIGHT - 0x0039: 0xF9, # DIGIT NINE - 0x003A: 0x7A, # COLON - 0x003B: 0x5E, # SEMICOLON - 0x003C: 0x4C, # LESS-THAN SIGN - 0x003D: 0x7E, # EQUALS SIGN - 0x003E: 0x6E, # GREATER-THAN SIGN - 0x003F: 0x6F, # QUESTION MARK - 0x0040: 0x7C, # COMMERCIAL AT - 0x0041: 0xC1, # LATIN CAPITAL LETTER A - 0x0042: 0xC2, # LATIN CAPITAL LETTER B - 0x0043: 0xC3, # LATIN CAPITAL LETTER C - 0x0044: 0xC4, # LATIN CAPITAL LETTER D - 0x0045: 0xC5, # LATIN CAPITAL LETTER E - 0x0046: 0xC6, # LATIN CAPITAL LETTER F - 0x0047: 0xC7, # LATIN CAPITAL LETTER G - 0x0048: 0xC8, # LATIN CAPITAL LETTER H - 0x0049: 0xC9, # LATIN CAPITAL LETTER I - 0x004A: 0xD1, # LATIN CAPITAL LETTER J - 0x004B: 0xD2, # LATIN CAPITAL LETTER K - 0x004C: 0xD3, # LATIN CAPITAL LETTER L - 0x004D: 0xD4, # LATIN CAPITAL LETTER M - 0x004E: 0xD5, # LATIN CAPITAL LETTER N - 0x004F: 0xD6, # LATIN CAPITAL LETTER O - 0x0050: 0xD7, # LATIN CAPITAL LETTER P - 0x0051: 0xD8, # LATIN CAPITAL LETTER Q - 0x0052: 0xD9, # LATIN CAPITAL LETTER R - 0x0053: 0xE2, # LATIN CAPITAL LETTER S - 0x0054: 0xE3, # LATIN CAPITAL LETTER T - 0x0055: 0xE4, # LATIN CAPITAL LETTER U - 0x0056: 0xE5, # LATIN CAPITAL LETTER V - 0x0057: 0xE6, # LATIN CAPITAL LETTER W - 0x0058: 0xE7, # LATIN CAPITAL LETTER X - 0x0059: 0xE8, # LATIN CAPITAL LETTER Y - 0x005A: 0xE9, # LATIN CAPITAL LETTER Z - 0x005B: 0x4A, # LEFT SQUARE BRACKET - 0x005C: 0xE0, # REVERSE SOLIDUS - 0x005D: 0x5A, # RIGHT SQUARE BRACKET - 0x005E: 0x5F, # CIRCUMFLEX ACCENT - 0x005F: 0x6D, # LOW LINE - 0x0060: 0x79, # GRAVE ACCENT - 0x0061: 0x81, # LATIN SMALL LETTER A - 0x0062: 0x82, # LATIN SMALL LETTER B - 0x0063: 0x83, # LATIN SMALL LETTER C - 0x0064: 0x84, # LATIN SMALL LETTER D - 0x0065: 0x85, # LATIN SMALL LETTER E - 0x0066: 0x86, # LATIN SMALL LETTER F - 0x0067: 0x87, # LATIN SMALL LETTER G - 0x0068: 0x88, # LATIN SMALL LETTER H - 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006A: 0x91, # LATIN SMALL LETTER J - 0x006B: 0x92, # LATIN SMALL LETTER K - 0x006C: 0x93, # LATIN SMALL LETTER L - 0x006D: 0x94, # LATIN SMALL LETTER M - 0x006E: 0x95, # LATIN SMALL LETTER N - 0x006F: 0x96, # LATIN SMALL LETTER O - 0x0070: 0x97, # LATIN SMALL LETTER P - 0x0071: 0x98, # LATIN SMALL LETTER Q - 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xA2, # LATIN SMALL LETTER S - 0x0074: 0xA3, # LATIN SMALL LETTER T - 0x0075: 0xA4, # LATIN SMALL LETTER U - 0x0076: 0xA5, # LATIN SMALL LETTER V - 0x0077: 0xA6, # LATIN SMALL LETTER W - 0x0078: 0xA7, # LATIN SMALL LETTER X - 0x0079: 0xA8, # LATIN SMALL LETTER Y - 0x007A: 0xA9, # LATIN SMALL LETTER Z - 0x007B: 0xC0, # LEFT CURLY BRACKET - 0x007C: 0xBB, # VERTICAL LINE - 0x007D: 0xD0, # RIGHT CURLY BRACKET - 0x007E: 0xA1, # TILDE - 0x007F: 0x07, # DELETE - 0x0080: 0x20, # CONTROL - 0x0081: 0x21, # CONTROL - 0x0082: 0x22, # CONTROL - 0x0083: 0x23, # CONTROL - 0x0084: 0x24, # CONTROL - 0x0085: 0x15, # CONTROL - 0x0086: 0x06, # CONTROL - 0x0087: 0x17, # CONTROL - 0x0088: 0x28, # CONTROL - 0x0089: 0x29, # CONTROL - 0x008A: 0x2A, # CONTROL - 0x008B: 0x2B, # CONTROL - 0x008C: 0x2C, # CONTROL - 0x008D: 0x09, # CONTROL - 0x008E: 0x0A, # CONTROL - 0x008F: 0x1B, # CONTROL - 0x0090: 0x30, # CONTROL - 0x0091: 0x31, # CONTROL - 0x0092: 0x1A, # CONTROL - 0x0093: 0x33, # CONTROL - 0x0094: 0x34, # CONTROL - 0x0095: 0x35, # CONTROL - 0x0096: 0x36, # CONTROL - 0x0097: 0x08, # CONTROL - 0x0098: 0x38, # CONTROL - 0x0099: 0x39, # CONTROL - 0x009A: 0x3A, # CONTROL - 0x009B: 0x3B, # CONTROL - 0x009C: 0x04, # CONTROL - 0x009D: 0x14, # CONTROL - 0x009E: 0x3E, # CONTROL - 0x009F: 0xFF, # CONTROL - 0x00A0: 0x41, # NO-BREAK SPACE - 0x00A1: 0xAA, # INVERTED EXCLAMATION MARK - 0x00A2: 0xB0, # CENT SIGN - 0x00A3: 0xB1, # POUND SIGN - 0x00A4: 0x9F, # CURRENCY SIGN - 0x00A5: 0xB2, # YEN SIGN - 0x00A6: 0x6A, # BROKEN BAR - 0x00A7: 0xB5, # SECTION SIGN - 0x00A8: 0xBD, # DIAERESIS - 0x00A9: 0xB4, # COPYRIGHT SIGN - 0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR - 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xBA, # NOT SIGN - 0x00AD: 0xCA, # SOFT HYPHEN - 0x00AE: 0xAF, # REGISTERED SIGN - 0x00AF: 0xBC, # MACRON - 0x00B0: 0x90, # DEGREE SIGN - 0x00B1: 0x8F, # PLUS-MINUS SIGN - 0x00B2: 0xEA, # SUPERSCRIPT TWO - 0x00B3: 0xFA, # SUPERSCRIPT THREE - 0x00B4: 0xBE, # ACUTE ACCENT - 0x00B5: 0xA0, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB3, # MIDDLE DOT - 0x00B8: 0x9D, # CEDILLA - 0x00B9: 0xDA, # SUPERSCRIPT ONE - 0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF - 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xAB, # INVERTED QUESTION MARK - 0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE - 0x00C7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D0: 0xAC, # LATIN CAPITAL LETTER ETH (ICELANDIC) - 0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xEC, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xBF, # MULTIPLICATION SIGN - 0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xFC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xAD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DE: 0xAE, # LATIN CAPITAL LETTER THORN (ICELANDIC) - 0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0x9C, # LATIN SMALL LIGATURE AE - 0x00E7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F0: 0x8C, # LATIN SMALL LETTER ETH (ICELANDIC) - 0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xCC, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xE1, # DIVISION SIGN - 0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xDC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0x8D, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FE: 0x8E, # LATIN SMALL LETTER THORN (ICELANDIC) - 0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x37, # END OF TRANSMISSION + 0x0005: 0x2D, # ENQUIRY + 0x0006: 0x2E, # ACKNOWLEDGE + 0x0007: 0x2F, # BELL + 0x0008: 0x16, # BACKSPACE + 0x0009: 0x05, # HORIZONTAL TABULATION + 0x000A: 0x25, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x3C, # DEVICE CONTROL FOUR + 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x32, # SYNCHRONOUS IDLE + 0x0017: 0x26, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x3F, # SUBSTITUTE + 0x001B: 0x27, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x40, # SPACE + 0x0021: 0x4F, # EXCLAMATION MARK + 0x0022: 0x7F, # QUOTATION MARK + 0x0023: 0x7B, # NUMBER SIGN + 0x0024: 0x5B, # DOLLAR SIGN + 0x0025: 0x6C, # PERCENT SIGN + 0x0026: 0x50, # AMPERSAND + 0x0027: 0x7D, # APOSTROPHE + 0x0028: 0x4D, # LEFT PARENTHESIS + 0x0029: 0x5D, # RIGHT PARENTHESIS + 0x002A: 0x5C, # ASTERISK + 0x002B: 0x4E, # PLUS SIGN + 0x002C: 0x6B, # COMMA + 0x002D: 0x60, # HYPHEN-MINUS + 0x002E: 0x4B, # FULL STOP + 0x002F: 0x61, # SOLIDUS + 0x0030: 0xF0, # DIGIT ZERO + 0x0031: 0xF1, # DIGIT ONE + 0x0032: 0xF2, # DIGIT TWO + 0x0033: 0xF3, # DIGIT THREE + 0x0034: 0xF4, # DIGIT FOUR + 0x0035: 0xF5, # DIGIT FIVE + 0x0036: 0xF6, # DIGIT SIX + 0x0037: 0xF7, # DIGIT SEVEN + 0x0038: 0xF8, # DIGIT EIGHT + 0x0039: 0xF9, # DIGIT NINE + 0x003A: 0x7A, # COLON + 0x003B: 0x5E, # SEMICOLON + 0x003C: 0x4C, # LESS-THAN SIGN + 0x003D: 0x7E, # EQUALS SIGN + 0x003E: 0x6E, # GREATER-THAN SIGN + 0x003F: 0x6F, # QUESTION MARK + 0x0040: 0x7C, # COMMERCIAL AT + 0x0041: 0xC1, # LATIN CAPITAL LETTER A + 0x0042: 0xC2, # LATIN CAPITAL LETTER B + 0x0043: 0xC3, # LATIN CAPITAL LETTER C + 0x0044: 0xC4, # LATIN CAPITAL LETTER D + 0x0045: 0xC5, # LATIN CAPITAL LETTER E + 0x0046: 0xC6, # LATIN CAPITAL LETTER F + 0x0047: 0xC7, # LATIN CAPITAL LETTER G + 0x0048: 0xC8, # LATIN CAPITAL LETTER H + 0x0049: 0xC9, # LATIN CAPITAL LETTER I + 0x004A: 0xD1, # LATIN CAPITAL LETTER J + 0x004B: 0xD2, # LATIN CAPITAL LETTER K + 0x004C: 0xD3, # LATIN CAPITAL LETTER L + 0x004D: 0xD4, # LATIN CAPITAL LETTER M + 0x004E: 0xD5, # LATIN CAPITAL LETTER N + 0x004F: 0xD6, # LATIN CAPITAL LETTER O + 0x0050: 0xD7, # LATIN CAPITAL LETTER P + 0x0051: 0xD8, # LATIN CAPITAL LETTER Q + 0x0052: 0xD9, # LATIN CAPITAL LETTER R + 0x0053: 0xE2, # LATIN CAPITAL LETTER S + 0x0054: 0xE3, # LATIN CAPITAL LETTER T + 0x0055: 0xE4, # LATIN CAPITAL LETTER U + 0x0056: 0xE5, # LATIN CAPITAL LETTER V + 0x0057: 0xE6, # LATIN CAPITAL LETTER W + 0x0058: 0xE7, # LATIN CAPITAL LETTER X + 0x0059: 0xE8, # LATIN CAPITAL LETTER Y + 0x005A: 0xE9, # LATIN CAPITAL LETTER Z + 0x005B: 0x4A, # LEFT SQUARE BRACKET + 0x005C: 0xE0, # REVERSE SOLIDUS + 0x005D: 0x5A, # RIGHT SQUARE BRACKET + 0x005E: 0x5F, # CIRCUMFLEX ACCENT + 0x005F: 0x6D, # LOW LINE + 0x0060: 0x79, # GRAVE ACCENT + 0x0061: 0x81, # LATIN SMALL LETTER A + 0x0062: 0x82, # LATIN SMALL LETTER B + 0x0063: 0x83, # LATIN SMALL LETTER C + 0x0064: 0x84, # LATIN SMALL LETTER D + 0x0065: 0x85, # LATIN SMALL LETTER E + 0x0066: 0x86, # LATIN SMALL LETTER F + 0x0067: 0x87, # LATIN SMALL LETTER G + 0x0068: 0x88, # LATIN SMALL LETTER H + 0x0069: 0x89, # LATIN SMALL LETTER I + 0x006A: 0x91, # LATIN SMALL LETTER J + 0x006B: 0x92, # LATIN SMALL LETTER K + 0x006C: 0x93, # LATIN SMALL LETTER L + 0x006D: 0x94, # LATIN SMALL LETTER M + 0x006E: 0x95, # LATIN SMALL LETTER N + 0x006F: 0x96, # LATIN SMALL LETTER O + 0x0070: 0x97, # LATIN SMALL LETTER P + 0x0071: 0x98, # LATIN SMALL LETTER Q + 0x0072: 0x99, # LATIN SMALL LETTER R + 0x0073: 0xA2, # LATIN SMALL LETTER S + 0x0074: 0xA3, # LATIN SMALL LETTER T + 0x0075: 0xA4, # LATIN SMALL LETTER U + 0x0076: 0xA5, # LATIN SMALL LETTER V + 0x0077: 0xA6, # LATIN SMALL LETTER W + 0x0078: 0xA7, # LATIN SMALL LETTER X + 0x0079: 0xA8, # LATIN SMALL LETTER Y + 0x007A: 0xA9, # LATIN SMALL LETTER Z + 0x007B: 0xC0, # LEFT CURLY BRACKET + 0x007C: 0xBB, # VERTICAL LINE + 0x007D: 0xD0, # RIGHT CURLY BRACKET + 0x007E: 0xA1, # TILDE + 0x007F: 0x07, # DELETE + 0x0080: 0x20, # CONTROL + 0x0081: 0x21, # CONTROL + 0x0082: 0x22, # CONTROL + 0x0083: 0x23, # CONTROL + 0x0084: 0x24, # CONTROL + 0x0085: 0x15, # CONTROL + 0x0086: 0x06, # CONTROL + 0x0087: 0x17, # CONTROL + 0x0088: 0x28, # CONTROL + 0x0089: 0x29, # CONTROL + 0x008A: 0x2A, # CONTROL + 0x008B: 0x2B, # CONTROL + 0x008C: 0x2C, # CONTROL + 0x008D: 0x09, # CONTROL + 0x008E: 0x0A, # CONTROL + 0x008F: 0x1B, # CONTROL + 0x0090: 0x30, # CONTROL + 0x0091: 0x31, # CONTROL + 0x0092: 0x1A, # CONTROL + 0x0093: 0x33, # CONTROL + 0x0094: 0x34, # CONTROL + 0x0095: 0x35, # CONTROL + 0x0096: 0x36, # CONTROL + 0x0097: 0x08, # CONTROL + 0x0098: 0x38, # CONTROL + 0x0099: 0x39, # CONTROL + 0x009A: 0x3A, # CONTROL + 0x009B: 0x3B, # CONTROL + 0x009C: 0x04, # CONTROL + 0x009D: 0x14, # CONTROL + 0x009E: 0x3E, # CONTROL + 0x009F: 0xFF, # CONTROL + 0x00A0: 0x41, # NO-BREAK SPACE + 0x00A1: 0xAA, # INVERTED EXCLAMATION MARK + 0x00A2: 0xB0, # CENT SIGN + 0x00A3: 0xB1, # POUND SIGN + 0x00A4: 0x9F, # CURRENCY SIGN + 0x00A5: 0xB2, # YEN SIGN + 0x00A6: 0x6A, # BROKEN BAR + 0x00A7: 0xB5, # SECTION SIGN + 0x00A8: 0xBD, # DIAERESIS + 0x00A9: 0xB4, # COPYRIGHT SIGN + 0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR + 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xBA, # NOT SIGN + 0x00AD: 0xCA, # SOFT HYPHEN + 0x00AE: 0xAF, # REGISTERED SIGN + 0x00AF: 0xBC, # MACRON + 0x00B0: 0x90, # DEGREE SIGN + 0x00B1: 0x8F, # PLUS-MINUS SIGN + 0x00B2: 0xEA, # SUPERSCRIPT TWO + 0x00B3: 0xFA, # SUPERSCRIPT THREE + 0x00B4: 0xBE, # ACUTE ACCENT + 0x00B5: 0xA0, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB3, # MIDDLE DOT + 0x00B8: 0x9D, # CEDILLA + 0x00B9: 0xDA, # SUPERSCRIPT ONE + 0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF + 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xAB, # INVERTED QUESTION MARK + 0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE + 0x00C7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D0: 0xAC, # LATIN CAPITAL LETTER ETH (ICELANDIC) + 0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xEC, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xBF, # MULTIPLICATION SIGN + 0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xFC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xAD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DE: 0xAE, # LATIN CAPITAL LETTER THORN (ICELANDIC) + 0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0x9C, # LATIN SMALL LIGATURE AE + 0x00E7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F0: 0x8C, # LATIN SMALL LETTER ETH (ICELANDIC) + 0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xCC, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xE1, # DIVISION SIGN + 0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xDC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0x8D, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FE: 0x8E, # LATIN SMALL LETTER THORN (ICELANDIC) + 0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS } - Modified: python/branches/ssize_t/Lib/encodings/cp737.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp737.py (original) +++ python/branches/ssize_t/Lib/encodings/cp737.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,654 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x0081: 0x0392, # GREEK CAPITAL LETTER BETA - 0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x0083: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x0084: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x0085: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x0086: 0x0397, # GREEK CAPITAL LETTER ETA - 0x0087: 0x0398, # GREEK CAPITAL LETTER THETA - 0x0088: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x0089: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x008a: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x008b: 0x039c, # GREEK CAPITAL LETTER MU - 0x008c: 0x039d, # GREEK CAPITAL LETTER NU - 0x008d: 0x039e, # GREEK CAPITAL LETTER XI - 0x008e: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x008f: 0x03a0, # GREEK CAPITAL LETTER PI - 0x0090: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x0091: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x0092: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x0093: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x0094: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x0095: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x0096: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x0097: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x0098: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x0099: 0x03b2, # GREEK SMALL LETTER BETA - 0x009a: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x009b: 0x03b4, # GREEK SMALL LETTER DELTA - 0x009c: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x009d: 0x03b6, # GREEK SMALL LETTER ZETA - 0x009e: 0x03b7, # GREEK SMALL LETTER ETA - 0x009f: 0x03b8, # GREEK SMALL LETTER THETA - 0x00a0: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00a1: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00a2: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00a3: 0x03bc, # GREEK SMALL LETTER MU - 0x00a4: 0x03bd, # GREEK SMALL LETTER NU - 0x00a5: 0x03be, # GREEK SMALL LETTER XI - 0x00a6: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00a7: 0x03c0, # GREEK SMALL LETTER PI - 0x00a8: 0x03c1, # GREEK SMALL LETTER RHO - 0x00a9: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00aa: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00ab: 0x03c4, # GREEK SMALL LETTER TAU - 0x00ac: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00ad: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ae: 0x03c7, # GREEK SMALL LETTER CHI - 0x00af: 0x03c8, # GREEK SMALL LETTER PSI - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00e1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x00e2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x00e3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x00e4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00e5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00e6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00e7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00e8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00e9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00ea: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x00eb: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x00ec: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x00ed: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x00ee: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x00ef: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x00f0: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x00f5: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x0081: 0x0392, # GREEK CAPITAL LETTER BETA + 0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x0083: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x0084: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x0085: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x0086: 0x0397, # GREEK CAPITAL LETTER ETA + 0x0087: 0x0398, # GREEK CAPITAL LETTER THETA + 0x0088: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x0089: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x008a: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x008b: 0x039c, # GREEK CAPITAL LETTER MU + 0x008c: 0x039d, # GREEK CAPITAL LETTER NU + 0x008d: 0x039e, # GREEK CAPITAL LETTER XI + 0x008e: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x008f: 0x03a0, # GREEK CAPITAL LETTER PI + 0x0090: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x0091: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x0092: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x0093: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x0094: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x0095: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x0096: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x0097: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x0098: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x0099: 0x03b2, # GREEK SMALL LETTER BETA + 0x009a: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x009b: 0x03b4, # GREEK SMALL LETTER DELTA + 0x009c: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x009d: 0x03b6, # GREEK SMALL LETTER ZETA + 0x009e: 0x03b7, # GREEK SMALL LETTER ETA + 0x009f: 0x03b8, # GREEK SMALL LETTER THETA + 0x00a0: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00a1: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00a2: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00a3: 0x03bc, # GREEK SMALL LETTER MU + 0x00a4: 0x03bd, # GREEK SMALL LETTER NU + 0x00a5: 0x03be, # GREEK SMALL LETTER XI + 0x00a6: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00a7: 0x03c0, # GREEK SMALL LETTER PI + 0x00a8: 0x03c1, # GREEK SMALL LETTER RHO + 0x00a9: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00aa: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00ab: 0x03c4, # GREEK SMALL LETTER TAU + 0x00ac: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00ad: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ae: 0x03c7, # GREEK SMALL LETTER CHI + 0x00af: 0x03c8, # GREEK SMALL LETTER PSI + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00e1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x00e2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x00e3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x00e4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00e5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00e6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00e7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00e8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00e9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00ea: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x00eb: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x00ec: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x00ed: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x00ee: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x00ef: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x00f0: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x00f5: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\u0391' # 0x0080 -> GREEK CAPITAL LETTER ALPHA - u'\u0392' # 0x0081 -> GREEK CAPITAL LETTER BETA - u'\u0393' # 0x0082 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0x0083 -> GREEK CAPITAL LETTER DELTA - u'\u0395' # 0x0084 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0x0085 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0x0086 -> GREEK CAPITAL LETTER ETA - u'\u0398' # 0x0087 -> GREEK CAPITAL LETTER THETA - u'\u0399' # 0x0088 -> GREEK CAPITAL LETTER IOTA - u'\u039a' # 0x0089 -> GREEK CAPITAL LETTER KAPPA - u'\u039b' # 0x008a -> GREEK CAPITAL LETTER LAMDA - u'\u039c' # 0x008b -> GREEK CAPITAL LETTER MU - u'\u039d' # 0x008c -> GREEK CAPITAL LETTER NU - u'\u039e' # 0x008d -> GREEK CAPITAL LETTER XI - u'\u039f' # 0x008e -> GREEK CAPITAL LETTER OMICRON - u'\u03a0' # 0x008f -> GREEK CAPITAL LETTER PI - u'\u03a1' # 0x0090 -> GREEK CAPITAL LETTER RHO - u'\u03a3' # 0x0091 -> GREEK CAPITAL LETTER SIGMA - u'\u03a4' # 0x0092 -> GREEK CAPITAL LETTER TAU - u'\u03a5' # 0x0093 -> GREEK CAPITAL LETTER UPSILON - u'\u03a6' # 0x0094 -> GREEK CAPITAL LETTER PHI - u'\u03a7' # 0x0095 -> GREEK CAPITAL LETTER CHI - u'\u03a8' # 0x0096 -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0x0097 -> GREEK CAPITAL LETTER OMEGA - u'\u03b1' # 0x0098 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0x0099 -> GREEK SMALL LETTER BETA - u'\u03b3' # 0x009a -> GREEK SMALL LETTER GAMMA - u'\u03b4' # 0x009b -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0x009c -> GREEK SMALL LETTER EPSILON - u'\u03b6' # 0x009d -> GREEK SMALL LETTER ZETA - u'\u03b7' # 0x009e -> GREEK SMALL LETTER ETA - u'\u03b8' # 0x009f -> GREEK SMALL LETTER THETA - u'\u03b9' # 0x00a0 -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0x00a1 -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0x00a2 -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0x00a3 -> GREEK SMALL LETTER MU - u'\u03bd' # 0x00a4 -> GREEK SMALL LETTER NU - u'\u03be' # 0x00a5 -> GREEK SMALL LETTER XI - u'\u03bf' # 0x00a6 -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0x00a7 -> GREEK SMALL LETTER PI - u'\u03c1' # 0x00a8 -> GREEK SMALL LETTER RHO - u'\u03c3' # 0x00a9 -> GREEK SMALL LETTER SIGMA - u'\u03c2' # 0x00aa -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c4' # 0x00ab -> GREEK SMALL LETTER TAU - u'\u03c5' # 0x00ac -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0x00ad -> GREEK SMALL LETTER PHI - u'\u03c7' # 0x00ae -> GREEK SMALL LETTER CHI - u'\u03c8' # 0x00af -> GREEK SMALL LETTER PSI - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03c9' # 0x00e0 -> GREEK SMALL LETTER OMEGA - u'\u03ac' # 0x00e1 -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u03ad' # 0x00e2 -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0x00e3 -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03ca' # 0x00e4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03af' # 0x00e5 -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03cc' # 0x00e6 -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0x00e7 -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03cb' # 0x00e8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03ce' # 0x00e9 -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\u0386' # 0x00ea -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\u0388' # 0x00eb -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0389' # 0x00ec -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0x00ed -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\u038c' # 0x00ee -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\u038e' # 0x00ef -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u038f' # 0x00f0 -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u03aa' # 0x00f4 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\u03ab' # 0x00f5 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0391' # 0x0080 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0x0081 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0x0082 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0x0083 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0x0084 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0x0085 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0x0086 -> GREEK CAPITAL LETTER ETA + u'\u0398' # 0x0087 -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0x0088 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0x0089 -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0x008a -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0x008b -> GREEK CAPITAL LETTER MU + u'\u039d' # 0x008c -> GREEK CAPITAL LETTER NU + u'\u039e' # 0x008d -> GREEK CAPITAL LETTER XI + u'\u039f' # 0x008e -> GREEK CAPITAL LETTER OMICRON + u'\u03a0' # 0x008f -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0x0090 -> GREEK CAPITAL LETTER RHO + u'\u03a3' # 0x0091 -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0x0092 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0x0093 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0x0094 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0x0095 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0x0096 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0x0097 -> GREEK CAPITAL LETTER OMEGA + u'\u03b1' # 0x0098 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0x0099 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0x009a -> GREEK SMALL LETTER GAMMA + u'\u03b4' # 0x009b -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0x009c -> GREEK SMALL LETTER EPSILON + u'\u03b6' # 0x009d -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0x009e -> GREEK SMALL LETTER ETA + u'\u03b8' # 0x009f -> GREEK SMALL LETTER THETA + u'\u03b9' # 0x00a0 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0x00a1 -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0x00a2 -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0x00a3 -> GREEK SMALL LETTER MU + u'\u03bd' # 0x00a4 -> GREEK SMALL LETTER NU + u'\u03be' # 0x00a5 -> GREEK SMALL LETTER XI + u'\u03bf' # 0x00a6 -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0x00a7 -> GREEK SMALL LETTER PI + u'\u03c1' # 0x00a8 -> GREEK SMALL LETTER RHO + u'\u03c3' # 0x00a9 -> GREEK SMALL LETTER SIGMA + u'\u03c2' # 0x00aa -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c4' # 0x00ab -> GREEK SMALL LETTER TAU + u'\u03c5' # 0x00ac -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0x00ad -> GREEK SMALL LETTER PHI + u'\u03c7' # 0x00ae -> GREEK SMALL LETTER CHI + u'\u03c8' # 0x00af -> GREEK SMALL LETTER PSI + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03c9' # 0x00e0 -> GREEK SMALL LETTER OMEGA + u'\u03ac' # 0x00e1 -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u03ad' # 0x00e2 -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0x00e3 -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03ca' # 0x00e4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03af' # 0x00e5 -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03cc' # 0x00e6 -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0x00e7 -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03cb' # 0x00e8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03ce' # 0x00e9 -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u0386' # 0x00ea -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\u0388' # 0x00eb -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0389' # 0x00ec -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0x00ed -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\u038c' # 0x00ee -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\u038e' # 0x00ef -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u038f' # 0x00f0 -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u03aa' # 0x00f4 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u03ab' # 0x00f5 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00f7: 0x00f6, # DIVISION SIGN - 0x0386: 0x00ea, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0388: 0x00eb, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0389: 0x00ec, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038a: 0x00ed, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038c: 0x00ee, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038e: 0x00ef, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038f: 0x00f0, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0391: 0x0080, # GREEK CAPITAL LETTER ALPHA - 0x0392: 0x0081, # GREEK CAPITAL LETTER BETA - 0x0393: 0x0082, # GREEK CAPITAL LETTER GAMMA - 0x0394: 0x0083, # GREEK CAPITAL LETTER DELTA - 0x0395: 0x0084, # GREEK CAPITAL LETTER EPSILON - 0x0396: 0x0085, # GREEK CAPITAL LETTER ZETA - 0x0397: 0x0086, # GREEK CAPITAL LETTER ETA - 0x0398: 0x0087, # GREEK CAPITAL LETTER THETA - 0x0399: 0x0088, # GREEK CAPITAL LETTER IOTA - 0x039a: 0x0089, # GREEK CAPITAL LETTER KAPPA - 0x039b: 0x008a, # GREEK CAPITAL LETTER LAMDA - 0x039c: 0x008b, # GREEK CAPITAL LETTER MU - 0x039d: 0x008c, # GREEK CAPITAL LETTER NU - 0x039e: 0x008d, # GREEK CAPITAL LETTER XI - 0x039f: 0x008e, # GREEK CAPITAL LETTER OMICRON - 0x03a0: 0x008f, # GREEK CAPITAL LETTER PI - 0x03a1: 0x0090, # GREEK CAPITAL LETTER RHO - 0x03a3: 0x0091, # GREEK CAPITAL LETTER SIGMA - 0x03a4: 0x0092, # GREEK CAPITAL LETTER TAU - 0x03a5: 0x0093, # GREEK CAPITAL LETTER UPSILON - 0x03a6: 0x0094, # GREEK CAPITAL LETTER PHI - 0x03a7: 0x0095, # GREEK CAPITAL LETTER CHI - 0x03a8: 0x0096, # GREEK CAPITAL LETTER PSI - 0x03a9: 0x0097, # GREEK CAPITAL LETTER OMEGA - 0x03aa: 0x00f4, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03ab: 0x00f5, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03ac: 0x00e1, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03ad: 0x00e2, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03ae: 0x00e3, # GREEK SMALL LETTER ETA WITH TONOS - 0x03af: 0x00e5, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03b1: 0x0098, # GREEK SMALL LETTER ALPHA - 0x03b2: 0x0099, # GREEK SMALL LETTER BETA - 0x03b3: 0x009a, # GREEK SMALL LETTER GAMMA - 0x03b4: 0x009b, # GREEK SMALL LETTER DELTA - 0x03b5: 0x009c, # GREEK SMALL LETTER EPSILON - 0x03b6: 0x009d, # GREEK SMALL LETTER ZETA - 0x03b7: 0x009e, # GREEK SMALL LETTER ETA - 0x03b8: 0x009f, # GREEK SMALL LETTER THETA - 0x03b9: 0x00a0, # GREEK SMALL LETTER IOTA - 0x03ba: 0x00a1, # GREEK SMALL LETTER KAPPA - 0x03bb: 0x00a2, # GREEK SMALL LETTER LAMDA - 0x03bc: 0x00a3, # GREEK SMALL LETTER MU - 0x03bd: 0x00a4, # GREEK SMALL LETTER NU - 0x03be: 0x00a5, # GREEK SMALL LETTER XI - 0x03bf: 0x00a6, # GREEK SMALL LETTER OMICRON - 0x03c0: 0x00a7, # GREEK SMALL LETTER PI - 0x03c1: 0x00a8, # GREEK SMALL LETTER RHO - 0x03c2: 0x00aa, # GREEK SMALL LETTER FINAL SIGMA - 0x03c3: 0x00a9, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00ab, # GREEK SMALL LETTER TAU - 0x03c5: 0x00ac, # GREEK SMALL LETTER UPSILON - 0x03c6: 0x00ad, # GREEK SMALL LETTER PHI - 0x03c7: 0x00ae, # GREEK SMALL LETTER CHI - 0x03c8: 0x00af, # GREEK SMALL LETTER PSI - 0x03c9: 0x00e0, # GREEK SMALL LETTER OMEGA - 0x03ca: 0x00e4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03cb: 0x00e8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03cc: 0x00e6, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03cd: 0x00e7, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03ce: 0x00e9, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} \ No newline at end of file + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00f7: 0x00f6, # DIVISION SIGN + 0x0386: 0x00ea, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0x00eb, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0x00ec, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038a: 0x00ed, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038c: 0x00ee, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038e: 0x00ef, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038f: 0x00f0, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0391: 0x0080, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0x0081, # GREEK CAPITAL LETTER BETA + 0x0393: 0x0082, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0x0083, # GREEK CAPITAL LETTER DELTA + 0x0395: 0x0084, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0x0085, # GREEK CAPITAL LETTER ZETA + 0x0397: 0x0086, # GREEK CAPITAL LETTER ETA + 0x0398: 0x0087, # GREEK CAPITAL LETTER THETA + 0x0399: 0x0088, # GREEK CAPITAL LETTER IOTA + 0x039a: 0x0089, # GREEK CAPITAL LETTER KAPPA + 0x039b: 0x008a, # GREEK CAPITAL LETTER LAMDA + 0x039c: 0x008b, # GREEK CAPITAL LETTER MU + 0x039d: 0x008c, # GREEK CAPITAL LETTER NU + 0x039e: 0x008d, # GREEK CAPITAL LETTER XI + 0x039f: 0x008e, # GREEK CAPITAL LETTER OMICRON + 0x03a0: 0x008f, # GREEK CAPITAL LETTER PI + 0x03a1: 0x0090, # GREEK CAPITAL LETTER RHO + 0x03a3: 0x0091, # GREEK CAPITAL LETTER SIGMA + 0x03a4: 0x0092, # GREEK CAPITAL LETTER TAU + 0x03a5: 0x0093, # GREEK CAPITAL LETTER UPSILON + 0x03a6: 0x0094, # GREEK CAPITAL LETTER PHI + 0x03a7: 0x0095, # GREEK CAPITAL LETTER CHI + 0x03a8: 0x0096, # GREEK CAPITAL LETTER PSI + 0x03a9: 0x0097, # GREEK CAPITAL LETTER OMEGA + 0x03aa: 0x00f4, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03ab: 0x00f5, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03ac: 0x00e1, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03ad: 0x00e2, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03ae: 0x00e3, # GREEK SMALL LETTER ETA WITH TONOS + 0x03af: 0x00e5, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03b1: 0x0098, # GREEK SMALL LETTER ALPHA + 0x03b2: 0x0099, # GREEK SMALL LETTER BETA + 0x03b3: 0x009a, # GREEK SMALL LETTER GAMMA + 0x03b4: 0x009b, # GREEK SMALL LETTER DELTA + 0x03b5: 0x009c, # GREEK SMALL LETTER EPSILON + 0x03b6: 0x009d, # GREEK SMALL LETTER ZETA + 0x03b7: 0x009e, # GREEK SMALL LETTER ETA + 0x03b8: 0x009f, # GREEK SMALL LETTER THETA + 0x03b9: 0x00a0, # GREEK SMALL LETTER IOTA + 0x03ba: 0x00a1, # GREEK SMALL LETTER KAPPA + 0x03bb: 0x00a2, # GREEK SMALL LETTER LAMDA + 0x03bc: 0x00a3, # GREEK SMALL LETTER MU + 0x03bd: 0x00a4, # GREEK SMALL LETTER NU + 0x03be: 0x00a5, # GREEK SMALL LETTER XI + 0x03bf: 0x00a6, # GREEK SMALL LETTER OMICRON + 0x03c0: 0x00a7, # GREEK SMALL LETTER PI + 0x03c1: 0x00a8, # GREEK SMALL LETTER RHO + 0x03c2: 0x00aa, # GREEK SMALL LETTER FINAL SIGMA + 0x03c3: 0x00a9, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00ab, # GREEK SMALL LETTER TAU + 0x03c5: 0x00ac, # GREEK SMALL LETTER UPSILON + 0x03c6: 0x00ad, # GREEK SMALL LETTER PHI + 0x03c7: 0x00ae, # GREEK SMALL LETTER CHI + 0x03c8: 0x00af, # GREEK SMALL LETTER PSI + 0x03c9: 0x00e0, # GREEK SMALL LETTER OMEGA + 0x03ca: 0x00e4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03cb: 0x00e8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03cc: 0x00e6, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03cd: 0x00e7, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0x00e9, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} Modified: python/branches/ssize_t/Lib/encodings/cp775.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp775.py (original) +++ python/branches/ssize_t/Lib/encodings/cp775.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,654 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x0089: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x008a: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x008b: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x008c: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0096: 0x00a2, # CENT SIGN - 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x00a4, # CURRENCY SIGN - 0x00a0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00a1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00a4: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00a5: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00a6: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00a7: 0x00a6, # BROKEN BAR - 0x00a8: 0x00a9, # COPYRIGHT SIGN - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00b6: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00b7: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00b8: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00be: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00c7: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00d0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00d1: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00d2: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00d3: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00d4: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00d5: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00d6: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00d7: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00d8: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00e8: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00e9: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00ea: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00eb: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00ec: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00ed: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00ee: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00ef: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x0089: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x008a: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x008b: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA + 0x008c: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0096: 0x00a2, # CENT SIGN + 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: 0x00a4, # CURRENCY SIGN + 0x00a0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x00a1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00a4: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00a5: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00a6: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00a7: 0x00a6, # BROKEN BAR + 0x00a8: 0x00a9, # COPYRIGHT SIGN + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00b6: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00b7: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00b8: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00be: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00c7: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00d0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00d1: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00d2: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00d3: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x00d4: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00d5: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00d6: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00d7: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00d8: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00e8: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00e9: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00ea: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00eb: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00ec: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00ed: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x00ee: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00ef: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\u0106' # 0x0080 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0101' # 0x0083 -> LATIN SMALL LETTER A WITH MACRON - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u0123' # 0x0085 -> LATIN SMALL LETTER G WITH CEDILLA - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\u0107' # 0x0087 -> LATIN SMALL LETTER C WITH ACUTE - u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE - u'\u0113' # 0x0089 -> LATIN SMALL LETTER E WITH MACRON - u'\u0156' # 0x008a -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\u0157' # 0x008b -> LATIN SMALL LETTER R WITH CEDILLA - u'\u012b' # 0x008c -> LATIN SMALL LETTER I WITH MACRON - u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\u014d' # 0x0093 -> LATIN SMALL LETTER O WITH MACRON - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u0122' # 0x0095 -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\xa2' # 0x0096 -> CENT SIGN - u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE - u'\xa3' # 0x009c -> POUND SIGN - u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd7' # 0x009e -> MULTIPLICATION SIGN - u'\xa4' # 0x009f -> CURRENCY SIGN - u'\u0100' # 0x00a0 -> LATIN CAPITAL LETTER A WITH MACRON - u'\u012a' # 0x00a1 -> LATIN CAPITAL LETTER I WITH MACRON - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\u017b' # 0x00a3 -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u017c' # 0x00a4 -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u017a' # 0x00a5 -> LATIN SMALL LETTER Z WITH ACUTE - u'\u201d' # 0x00a6 -> RIGHT DOUBLE QUOTATION MARK - u'\xa6' # 0x00a7 -> BROKEN BAR - u'\xa9' # 0x00a8 -> COPYRIGHT SIGN - u'\xae' # 0x00a9 -> REGISTERED SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\u0141' # 0x00ad -> LATIN CAPITAL LETTER L WITH STROKE - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u0104' # 0x00b5 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u010c' # 0x00b6 -> LATIN CAPITAL LETTER C WITH CARON - u'\u0118' # 0x00b7 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u0116' # 0x00b8 -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u012e' # 0x00bd -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u0160' # 0x00be -> LATIN CAPITAL LETTER S WITH CARON - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u0172' # 0x00c6 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\u016a' # 0x00c7 -> LATIN CAPITAL LETTER U WITH MACRON - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u017d' # 0x00cf -> LATIN CAPITAL LETTER Z WITH CARON - u'\u0105' # 0x00d0 -> LATIN SMALL LETTER A WITH OGONEK - u'\u010d' # 0x00d1 -> LATIN SMALL LETTER C WITH CARON - u'\u0119' # 0x00d2 -> LATIN SMALL LETTER E WITH OGONEK - u'\u0117' # 0x00d3 -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\u012f' # 0x00d4 -> LATIN SMALL LETTER I WITH OGONEK - u'\u0161' # 0x00d5 -> LATIN SMALL LETTER S WITH CARON - u'\u0173' # 0x00d6 -> LATIN SMALL LETTER U WITH OGONEK - u'\u016b' # 0x00d7 -> LATIN SMALL LETTER U WITH MACRON - u'\u017e' # 0x00d8 -> LATIN SMALL LETTER Z WITH CARON - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'\u014c' # 0x00e2 -> LATIN CAPITAL LETTER O WITH MACRON - u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE - u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u0144' # 0x00e7 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0136' # 0x00e8 -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\u0137' # 0x00e9 -> LATIN SMALL LETTER K WITH CEDILLA - u'\u013b' # 0x00ea -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u013c' # 0x00eb -> LATIN SMALL LETTER L WITH CEDILLA - u'\u0146' # 0x00ec -> LATIN SMALL LETTER N WITH CEDILLA - u'\u0112' # 0x00ed -> LATIN CAPITAL LETTER E WITH MACRON - u'\u0145' # 0x00ee -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\u2019' # 0x00ef -> RIGHT SINGLE QUOTATION MARK - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u201c' # 0x00f2 -> LEFT DOUBLE QUOTATION MARK - u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS - u'\xb6' # 0x00f4 -> PILCROW SIGN - u'\xa7' # 0x00f5 -> SECTION SIGN - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u201e' # 0x00f7 -> DOUBLE LOW-9 QUOTATION MARK - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\xb9' # 0x00fb -> SUPERSCRIPT ONE - u'\xb3' # 0x00fc -> SUPERSCRIPT THREE - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0106' # 0x0080 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0101' # 0x0083 -> LATIN SMALL LETTER A WITH MACRON + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u0123' # 0x0085 -> LATIN SMALL LETTER G WITH CEDILLA + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\u0107' # 0x0087 -> LATIN SMALL LETTER C WITH ACUTE + u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE + u'\u0113' # 0x0089 -> LATIN SMALL LETTER E WITH MACRON + u'\u0156' # 0x008a -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\u0157' # 0x008b -> LATIN SMALL LETTER R WITH CEDILLA + u'\u012b' # 0x008c -> LATIN SMALL LETTER I WITH MACRON + u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\u014d' # 0x0093 -> LATIN SMALL LETTER O WITH MACRON + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u0122' # 0x0095 -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\xa2' # 0x0096 -> CENT SIGN + u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd7' # 0x009e -> MULTIPLICATION SIGN + u'\xa4' # 0x009f -> CURRENCY SIGN + u'\u0100' # 0x00a0 -> LATIN CAPITAL LETTER A WITH MACRON + u'\u012a' # 0x00a1 -> LATIN CAPITAL LETTER I WITH MACRON + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\u017b' # 0x00a3 -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017c' # 0x00a4 -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u017a' # 0x00a5 -> LATIN SMALL LETTER Z WITH ACUTE + u'\u201d' # 0x00a6 -> RIGHT DOUBLE QUOTATION MARK + u'\xa6' # 0x00a7 -> BROKEN BAR + u'\xa9' # 0x00a8 -> COPYRIGHT SIGN + u'\xae' # 0x00a9 -> REGISTERED SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\u0141' # 0x00ad -> LATIN CAPITAL LETTER L WITH STROKE + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u0104' # 0x00b5 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u010c' # 0x00b6 -> LATIN CAPITAL LETTER C WITH CARON + u'\u0118' # 0x00b7 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0116' # 0x00b8 -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u012e' # 0x00bd -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u0160' # 0x00be -> LATIN CAPITAL LETTER S WITH CARON + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u0172' # 0x00c6 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\u016a' # 0x00c7 -> LATIN CAPITAL LETTER U WITH MACRON + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u017d' # 0x00cf -> LATIN CAPITAL LETTER Z WITH CARON + u'\u0105' # 0x00d0 -> LATIN SMALL LETTER A WITH OGONEK + u'\u010d' # 0x00d1 -> LATIN SMALL LETTER C WITH CARON + u'\u0119' # 0x00d2 -> LATIN SMALL LETTER E WITH OGONEK + u'\u0117' # 0x00d3 -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\u012f' # 0x00d4 -> LATIN SMALL LETTER I WITH OGONEK + u'\u0161' # 0x00d5 -> LATIN SMALL LETTER S WITH CARON + u'\u0173' # 0x00d6 -> LATIN SMALL LETTER U WITH OGONEK + u'\u016b' # 0x00d7 -> LATIN SMALL LETTER U WITH MACRON + u'\u017e' # 0x00d8 -> LATIN SMALL LETTER Z WITH CARON + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN) + u'\u014c' # 0x00e2 -> LATIN CAPITAL LETTER O WITH MACRON + u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE + u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u0144' # 0x00e7 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0136' # 0x00e8 -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\u0137' # 0x00e9 -> LATIN SMALL LETTER K WITH CEDILLA + u'\u013b' # 0x00ea -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u013c' # 0x00eb -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0146' # 0x00ec -> LATIN SMALL LETTER N WITH CEDILLA + u'\u0112' # 0x00ed -> LATIN CAPITAL LETTER E WITH MACRON + u'\u0145' # 0x00ee -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\u2019' # 0x00ef -> RIGHT SINGLE QUOTATION MARK + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u201c' # 0x00f2 -> LEFT DOUBLE QUOTATION MARK + u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0x00f4 -> PILCROW SIGN + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u201e' # 0x00f7 -> DOUBLE LOW-9 QUOTATION MARK + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\xb9' # 0x00fb -> SUPERSCRIPT ONE + u'\xb3' # 0x00fc -> SUPERSCRIPT THREE + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a2: 0x0096, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a4: 0x009f, # CURRENCY SIGN - 0x00a6: 0x00a7, # BROKEN BAR - 0x00a7: 0x00f5, # SECTION SIGN - 0x00a9: 0x00a8, # COPYRIGHT SIGN - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00ae: 0x00a9, # REGISTERED SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b3: 0x00fc, # SUPERSCRIPT THREE - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b6: 0x00f4, # PILCROW SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00b9: 0x00fb, # SUPERSCRIPT ONE - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0x009e, # MULTIPLICATION SIGN - 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0100: 0x00a0, # LATIN CAPITAL LETTER A WITH MACRON - 0x0101: 0x0083, # LATIN SMALL LETTER A WITH MACRON - 0x0104: 0x00b5, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0x00d0, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0x0080, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0x0087, # LATIN SMALL LETTER C WITH ACUTE - 0x010c: 0x00b6, # LATIN CAPITAL LETTER C WITH CARON - 0x010d: 0x00d1, # LATIN SMALL LETTER C WITH CARON - 0x0112: 0x00ed, # LATIN CAPITAL LETTER E WITH MACRON - 0x0113: 0x0089, # LATIN SMALL LETTER E WITH MACRON - 0x0116: 0x00b8, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0117: 0x00d3, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0x00b7, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0x00d2, # LATIN SMALL LETTER E WITH OGONEK - 0x0122: 0x0095, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0x0085, # LATIN SMALL LETTER G WITH CEDILLA - 0x012a: 0x00a1, # LATIN CAPITAL LETTER I WITH MACRON - 0x012b: 0x008c, # LATIN SMALL LETTER I WITH MACRON - 0x012e: 0x00bd, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012f: 0x00d4, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0x00e8, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0x00e9, # LATIN SMALL LETTER K WITH CEDILLA - 0x013b: 0x00ea, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013c: 0x00eb, # LATIN SMALL LETTER L WITH CEDILLA - 0x0141: 0x00ad, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0x00e7, # LATIN SMALL LETTER N WITH ACUTE - 0x0145: 0x00ee, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0x00ec, # LATIN SMALL LETTER N WITH CEDILLA - 0x014c: 0x00e2, # LATIN CAPITAL LETTER O WITH MACRON - 0x014d: 0x0093, # LATIN SMALL LETTER O WITH MACRON - 0x0156: 0x008a, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x0157: 0x008b, # LATIN SMALL LETTER R WITH CEDILLA - 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE - 0x0160: 0x00be, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0x00d5, # LATIN SMALL LETTER S WITH CARON - 0x016a: 0x00c7, # LATIN CAPITAL LETTER U WITH MACRON - 0x016b: 0x00d7, # LATIN SMALL LETTER U WITH MACRON - 0x0172: 0x00c6, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0x00d6, # LATIN SMALL LETTER U WITH OGONEK - 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017a: 0x00a5, # LATIN SMALL LETTER Z WITH ACUTE - 0x017b: 0x00a3, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017c: 0x00a4, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017d: 0x00cf, # LATIN CAPITAL LETTER Z WITH CARON - 0x017e: 0x00d8, # LATIN SMALL LETTER Z WITH CARON - 0x2019: 0x00ef, # RIGHT SINGLE QUOTATION MARK - 0x201c: 0x00f2, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0x00a6, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0x00f7, # DOUBLE LOW-9 QUOTATION MARK - 0x2219: 0x00f9, # BULLET OPERATOR - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} \ No newline at end of file + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a2: 0x0096, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x009f, # CURRENCY SIGN + 0x00a6: 0x00a7, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a9: 0x00a8, # COPYRIGHT SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00ae: 0x00a9, # REGISTERED SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00fc, # SUPERSCRIPT THREE + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x00f4, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b9: 0x00fb, # SUPERSCRIPT ONE + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x009e, # MULTIPLICATION SIGN + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0100: 0x00a0, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0x0083, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0x00b5, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00d0, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x0080, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x0087, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00b6, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00d1, # LATIN SMALL LETTER C WITH CARON + 0x0112: 0x00ed, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0x0089, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0x00b8, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0x00d3, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0x00b7, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00d2, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0x0095, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0x0085, # LATIN SMALL LETTER G WITH CEDILLA + 0x012a: 0x00a1, # LATIN CAPITAL LETTER I WITH MACRON + 0x012b: 0x008c, # LATIN SMALL LETTER I WITH MACRON + 0x012e: 0x00bd, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012f: 0x00d4, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0x00e8, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0x00e9, # LATIN SMALL LETTER K WITH CEDILLA + 0x013b: 0x00ea, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013c: 0x00eb, # LATIN SMALL LETTER L WITH CEDILLA + 0x0141: 0x00ad, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00e7, # LATIN SMALL LETTER N WITH ACUTE + 0x0145: 0x00ee, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0x00ec, # LATIN SMALL LETTER N WITH CEDILLA + 0x014c: 0x00e2, # LATIN CAPITAL LETTER O WITH MACRON + 0x014d: 0x0093, # LATIN SMALL LETTER O WITH MACRON + 0x0156: 0x008a, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0x008b, # LATIN SMALL LETTER R WITH CEDILLA + 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0x00be, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00d5, # LATIN SMALL LETTER S WITH CARON + 0x016a: 0x00c7, # LATIN CAPITAL LETTER U WITH MACRON + 0x016b: 0x00d7, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0x00c6, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0x00d6, # LATIN SMALL LETTER U WITH OGONEK + 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x00a5, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00a3, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00a4, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x00cf, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00d8, # LATIN SMALL LETTER Z WITH CARON + 0x2019: 0x00ef, # RIGHT SINGLE QUOTATION MARK + 0x201c: 0x00f2, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x00a6, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x00f7, # DOUBLE LOW-9 QUOTATION MARK + 0x2219: 0x00f9, # BULLET OPERATOR + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} Modified: python/branches/ssize_t/Lib/encodings/cp850.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp850.py (original) +++ python/branches/ssize_t/Lib/encodings/cp850.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,654 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00b8: 0x00a9, # COPYRIGHT SIGN - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x00a2, # CENT SIGN - 0x00be: 0x00a5, # YEN SIGN - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH - 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH - 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00d5: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x00a6, # BROKEN BAR - 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN - 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00ee: 0x00af, # MACRON - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2017, # DOUBLE LOW LINE - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00b8: 0x00a9, # COPYRIGHT SIGN + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x00a2, # CENT SIGN + 0x00be: 0x00a5, # YEN SIGN + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH + 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00d5: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x00a6, # BROKEN BAR + 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN + 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN + 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00ee: 0x00af, # MACRON + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2017, # DOUBLE LOW LINE + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE - u'\xa3' # 0x009c -> POUND SIGN - u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd7' # 0x009e -> MULTIPLICATION SIGN - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\xae' # 0x00a9 -> REGISTERED SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xa9' # 0x00b8 -> COPYRIGHT SIGN - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\xa2' # 0x00bd -> CENT SIGN - u'\xa5' # 0x00be -> YEN SIGN - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE - u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0x00cf -> CURRENCY SIGN - u'\xf0' # 0x00d0 -> LATIN SMALL LETTER ETH - u'\xd0' # 0x00d1 -> LATIN CAPITAL LETTER ETH - u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\u0131' # 0x00d5 -> LATIN SMALL LETTER DOTLESS I - u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\xa6' # 0x00dd -> BROKEN BAR - u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE - u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\xfe' # 0x00e7 -> LATIN SMALL LETTER THORN - u'\xde' # 0x00e8 -> LATIN CAPITAL LETTER THORN - u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE - u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xaf' # 0x00ee -> MACRON - u'\xb4' # 0x00ef -> ACUTE ACCENT - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2017' # 0x00f2 -> DOUBLE LOW LINE - u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS - u'\xb6' # 0x00f4 -> PILCROW SIGN - u'\xa7' # 0x00f5 -> SECTION SIGN - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\xb8' # 0x00f7 -> CEDILLA - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\xa8' # 0x00f9 -> DIAERESIS - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\xb9' # 0x00fb -> SUPERSCRIPT ONE - u'\xb3' # 0x00fc -> SUPERSCRIPT THREE - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd7' # 0x009e -> MULTIPLICATION SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\xae' # 0x00a9 -> REGISTERED SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xa9' # 0x00b8 -> COPYRIGHT SIGN + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\xa2' # 0x00bd -> CENT SIGN + u'\xa5' # 0x00be -> YEN SIGN + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE + u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\xf0' # 0x00d0 -> LATIN SMALL LETTER ETH + u'\xd0' # 0x00d1 -> LATIN CAPITAL LETTER ETH + u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\u0131' # 0x00d5 -> LATIN SMALL LETTER DOTLESS I + u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\xa6' # 0x00dd -> BROKEN BAR + u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE + u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\xfe' # 0x00e7 -> LATIN SMALL LETTER THORN + u'\xde' # 0x00e8 -> LATIN CAPITAL LETTER THORN + u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE + u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xaf' # 0x00ee -> MACRON + u'\xb4' # 0x00ef -> ACUTE ACCENT + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2017' # 0x00f2 -> DOUBLE LOW LINE + u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0x00f4 -> PILCROW SIGN + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\xb8' # 0x00f7 -> CEDILLA + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\xb9' # 0x00fb -> SUPERSCRIPT ONE + u'\xb3' # 0x00fc -> SUPERSCRIPT THREE + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a2: 0x00bd, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a4: 0x00cf, # CURRENCY SIGN - 0x00a5: 0x00be, # YEN SIGN - 0x00a6: 0x00dd, # BROKEN BAR - 0x00a7: 0x00f5, # SECTION SIGN - 0x00a8: 0x00f9, # DIAERESIS - 0x00a9: 0x00b8, # COPYRIGHT SIGN - 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00ae: 0x00a9, # REGISTERED SIGN - 0x00af: 0x00ee, # MACRON - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b3: 0x00fc, # SUPERSCRIPT THREE - 0x00b4: 0x00ef, # ACUTE ACCENT - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b6: 0x00f4, # PILCROW SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00b8: 0x00f7, # CEDILLA - 0x00b9: 0x00fb, # SUPERSCRIPT ONE - 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d0: 0x00d1, # LATIN CAPITAL LETTER ETH - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0x009e, # MULTIPLICATION SIGN - 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00de: 0x00e8, # LATIN CAPITAL LETTER THORN - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f0: 0x00d0, # LATIN SMALL LETTER ETH - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fe: 0x00e7, # LATIN SMALL LETTER THORN - 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0131: 0x00d5, # LATIN SMALL LETTER DOTLESS I - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x2017: 0x00f2, # DOUBLE LOW LINE - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} \ No newline at end of file + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00bd, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a5: 0x00be, # YEN SIGN + 0x00a6: 0x00dd, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00a9: 0x00b8, # COPYRIGHT SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00ae: 0x00a9, # REGISTERED SIGN + 0x00af: 0x00ee, # MACRON + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00fc, # SUPERSCRIPT THREE + 0x00b4: 0x00ef, # ACUTE ACCENT + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x00f4, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b8: 0x00f7, # CEDILLA + 0x00b9: 0x00fb, # SUPERSCRIPT ONE + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d0: 0x00d1, # LATIN CAPITAL LETTER ETH + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x009e, # MULTIPLICATION SIGN + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x00e8, # LATIN CAPITAL LETTER THORN + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x00d0, # LATIN SMALL LETTER ETH + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x00e7, # LATIN SMALL LETTER THORN + 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0131: 0x00d5, # LATIN SMALL LETTER DOTLESS I + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x2017: 0x00f2, # DOUBLE LOW LINE + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} Modified: python/branches/ssize_t/Lib/encodings/cp852.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp852.py (original) +++ python/branches/ssize_t/Lib/encodings/cp852.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,654 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0086: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x008b: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE - 0x0092: 0x013a, # LATIN SMALL LETTER L WITH ACUTE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x013d, # LATIN CAPITAL LETTER L WITH CARON - 0x0096: 0x013e, # LATIN SMALL LETTER L WITH CARON - 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x0164, # LATIN CAPITAL LETTER T WITH CARON - 0x009c: 0x0165, # LATIN SMALL LETTER T WITH CARON - 0x009d: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00a5: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00a6: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00a7: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00a8: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00a9: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00ac: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ad: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x011a, # LATIN CAPITAL LETTER E WITH CARON - 0x00b8: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00be: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE - 0x00c7: 0x0103, # LATIN SMALL LETTER A WITH BREVE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00d1: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d2: 0x010e, # LATIN CAPITAL LETTER D WITH CARON - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x010f, # LATIN SMALL LETTER D WITH CARON - 0x00d5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x011b, # LATIN SMALL LETTER E WITH CARON - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x00de: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00e4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00e5: 0x0148, # LATIN SMALL LETTER N WITH CARON - 0x00e6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00e7: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00e8: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x0155, # LATIN SMALL LETTER R WITH ACUTE - 0x00eb: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00ee: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00f2: 0x02db, # OGONEK - 0x00f3: 0x02c7, # CARON - 0x00f4: 0x02d8, # BREVE - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x02d9, # DOT ABOVE - 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x00fc: 0x0158, # LATIN CAPITAL LETTER R WITH CARON - 0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0086: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x008b: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE + 0x0092: 0x013a, # LATIN SMALL LETTER L WITH ACUTE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x013d, # LATIN CAPITAL LETTER L WITH CARON + 0x0096: 0x013e, # LATIN SMALL LETTER L WITH CARON + 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x0164, # LATIN CAPITAL LETTER T WITH CARON + 0x009c: 0x0165, # LATIN SMALL LETTER T WITH CARON + 0x009d: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00a5: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00a6: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00a7: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00a8: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00a9: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00ac: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ad: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00b7: 0x011a, # LATIN CAPITAL LETTER E WITH CARON + 0x00b8: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00be: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE + 0x00c7: 0x0103, # LATIN SMALL LETTER A WITH BREVE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00d1: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d2: 0x010e, # LATIN CAPITAL LETTER D WITH CARON + 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00d4: 0x010f, # LATIN SMALL LETTER D WITH CARON + 0x00d5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON + 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d8: 0x011b, # LATIN SMALL LETTER E WITH CARON + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x00de: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00e4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00e5: 0x0148, # LATIN SMALL LETTER N WITH CARON + 0x00e6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00e7: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00e8: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE + 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ea: 0x0155, # LATIN SMALL LETTER R WITH ACUTE + 0x00eb: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00ee: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00f2: 0x02db, # OGONEK + 0x00f3: 0x02c7, # CARON + 0x00f4: 0x02d8, # BREVE + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x02d9, # DOT ABOVE + 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x00fc: 0x0158, # LATIN CAPITAL LETTER R WITH CARON + 0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u016f' # 0x0085 -> LATIN SMALL LETTER U WITH RING ABOVE - u'\u0107' # 0x0086 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0150' # 0x008a -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\u0151' # 0x008b -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0106' # 0x008f -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0139' # 0x0091 -> LATIN CAPITAL LETTER L WITH ACUTE - u'\u013a' # 0x0092 -> LATIN SMALL LETTER L WITH ACUTE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u013d' # 0x0095 -> LATIN CAPITAL LETTER L WITH CARON - u'\u013e' # 0x0096 -> LATIN SMALL LETTER L WITH CARON - u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0164' # 0x009b -> LATIN CAPITAL LETTER T WITH CARON - u'\u0165' # 0x009c -> LATIN SMALL LETTER T WITH CARON - u'\u0141' # 0x009d -> LATIN CAPITAL LETTER L WITH STROKE - u'\xd7' # 0x009e -> MULTIPLICATION SIGN - u'\u010d' # 0x009f -> LATIN SMALL LETTER C WITH CARON - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\u0104' # 0x00a4 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u0105' # 0x00a5 -> LATIN SMALL LETTER A WITH OGONEK - u'\u017d' # 0x00a6 -> LATIN CAPITAL LETTER Z WITH CARON - u'\u017e' # 0x00a7 -> LATIN SMALL LETTER Z WITH CARON - u'\u0118' # 0x00a8 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u0119' # 0x00a9 -> LATIN SMALL LETTER E WITH OGONEK - u'\xac' # 0x00aa -> NOT SIGN - u'\u017a' # 0x00ab -> LATIN SMALL LETTER Z WITH ACUTE - u'\u010c' # 0x00ac -> LATIN CAPITAL LETTER C WITH CARON - u'\u015f' # 0x00ad -> LATIN SMALL LETTER S WITH CEDILLA - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u011a' # 0x00b7 -> LATIN CAPITAL LETTER E WITH CARON - u'\u015e' # 0x00b8 -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u017b' # 0x00bd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u017c' # 0x00be -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u0102' # 0x00c6 -> LATIN CAPITAL LETTER A WITH BREVE - u'\u0103' # 0x00c7 -> LATIN SMALL LETTER A WITH BREVE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0x00cf -> CURRENCY SIGN - u'\u0111' # 0x00d0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0110' # 0x00d1 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u010e' # 0x00d2 -> LATIN CAPITAL LETTER D WITH CARON - u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u010f' # 0x00d4 -> LATIN SMALL LETTER D WITH CARON - u'\u0147' # 0x00d5 -> LATIN CAPITAL LETTER N WITH CARON - u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u011b' # 0x00d8 -> LATIN SMALL LETTER E WITH CARON - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u0162' # 0x00dd -> LATIN CAPITAL LETTER T WITH CEDILLA - u'\u016e' # 0x00de -> LATIN CAPITAL LETTER U WITH RING ABOVE - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0144' # 0x00e4 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0148' # 0x00e5 -> LATIN SMALL LETTER N WITH CARON - u'\u0160' # 0x00e6 -> LATIN CAPITAL LETTER S WITH CARON - u'\u0161' # 0x00e7 -> LATIN SMALL LETTER S WITH CARON - u'\u0154' # 0x00e8 -> LATIN CAPITAL LETTER R WITH ACUTE - u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\u0155' # 0x00ea -> LATIN SMALL LETTER R WITH ACUTE - u'\u0170' # 0x00eb -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE - u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\u0163' # 0x00ee -> LATIN SMALL LETTER T WITH CEDILLA - u'\xb4' # 0x00ef -> ACUTE ACCENT - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\u02dd' # 0x00f1 -> DOUBLE ACUTE ACCENT - u'\u02db' # 0x00f2 -> OGONEK - u'\u02c7' # 0x00f3 -> CARON - u'\u02d8' # 0x00f4 -> BREVE - u'\xa7' # 0x00f5 -> SECTION SIGN - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\xb8' # 0x00f7 -> CEDILLA - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\xa8' # 0x00f9 -> DIAERESIS - u'\u02d9' # 0x00fa -> DOT ABOVE - u'\u0171' # 0x00fb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\u0158' # 0x00fc -> LATIN CAPITAL LETTER R WITH CARON - u'\u0159' # 0x00fd -> LATIN SMALL LETTER R WITH CARON - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u016f' # 0x0085 -> LATIN SMALL LETTER U WITH RING ABOVE + u'\u0107' # 0x0086 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0150' # 0x008a -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\u0151' # 0x008b -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0106' # 0x008f -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0139' # 0x0091 -> LATIN CAPITAL LETTER L WITH ACUTE + u'\u013a' # 0x0092 -> LATIN SMALL LETTER L WITH ACUTE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u013d' # 0x0095 -> LATIN CAPITAL LETTER L WITH CARON + u'\u013e' # 0x0096 -> LATIN SMALL LETTER L WITH CARON + u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0164' # 0x009b -> LATIN CAPITAL LETTER T WITH CARON + u'\u0165' # 0x009c -> LATIN SMALL LETTER T WITH CARON + u'\u0141' # 0x009d -> LATIN CAPITAL LETTER L WITH STROKE + u'\xd7' # 0x009e -> MULTIPLICATION SIGN + u'\u010d' # 0x009f -> LATIN SMALL LETTER C WITH CARON + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\u0104' # 0x00a4 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0105' # 0x00a5 -> LATIN SMALL LETTER A WITH OGONEK + u'\u017d' # 0x00a6 -> LATIN CAPITAL LETTER Z WITH CARON + u'\u017e' # 0x00a7 -> LATIN SMALL LETTER Z WITH CARON + u'\u0118' # 0x00a8 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0119' # 0x00a9 -> LATIN SMALL LETTER E WITH OGONEK + u'\xac' # 0x00aa -> NOT SIGN + u'\u017a' # 0x00ab -> LATIN SMALL LETTER Z WITH ACUTE + u'\u010c' # 0x00ac -> LATIN CAPITAL LETTER C WITH CARON + u'\u015f' # 0x00ad -> LATIN SMALL LETTER S WITH CEDILLA + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u011a' # 0x00b7 -> LATIN CAPITAL LETTER E WITH CARON + u'\u015e' # 0x00b8 -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u017b' # 0x00bd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017c' # 0x00be -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u0102' # 0x00c6 -> LATIN CAPITAL LETTER A WITH BREVE + u'\u0103' # 0x00c7 -> LATIN SMALL LETTER A WITH BREVE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\u0111' # 0x00d0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0110' # 0x00d1 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u010e' # 0x00d2 -> LATIN CAPITAL LETTER D WITH CARON + u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u010f' # 0x00d4 -> LATIN SMALL LETTER D WITH CARON + u'\u0147' # 0x00d5 -> LATIN CAPITAL LETTER N WITH CARON + u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u011b' # 0x00d8 -> LATIN SMALL LETTER E WITH CARON + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u0162' # 0x00dd -> LATIN CAPITAL LETTER T WITH CEDILLA + u'\u016e' # 0x00de -> LATIN CAPITAL LETTER U WITH RING ABOVE + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0144' # 0x00e4 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0148' # 0x00e5 -> LATIN SMALL LETTER N WITH CARON + u'\u0160' # 0x00e6 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0161' # 0x00e7 -> LATIN SMALL LETTER S WITH CARON + u'\u0154' # 0x00e8 -> LATIN CAPITAL LETTER R WITH ACUTE + u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\u0155' # 0x00ea -> LATIN SMALL LETTER R WITH ACUTE + u'\u0170' # 0x00eb -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE + u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0163' # 0x00ee -> LATIN SMALL LETTER T WITH CEDILLA + u'\xb4' # 0x00ef -> ACUTE ACCENT + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\u02dd' # 0x00f1 -> DOUBLE ACUTE ACCENT + u'\u02db' # 0x00f2 -> OGONEK + u'\u02c7' # 0x00f3 -> CARON + u'\u02d8' # 0x00f4 -> BREVE + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\xb8' # 0x00f7 -> CEDILLA + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\u02d9' # 0x00fa -> DOT ABOVE + u'\u0171' # 0x00fb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\u0158' # 0x00fc -> LATIN CAPITAL LETTER R WITH CARON + u'\u0159' # 0x00fd -> LATIN SMALL LETTER R WITH CARON + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a4: 0x00cf, # CURRENCY SIGN - 0x00a7: 0x00f5, # SECTION SIGN - 0x00a8: 0x00f9, # DIAERESIS - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b4: 0x00ef, # ACUTE ACCENT - 0x00b8: 0x00f7, # CEDILLA - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0x009e, # MULTIPLICATION SIGN - 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE - 0x0102: 0x00c6, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0x00c7, # LATIN SMALL LETTER A WITH BREVE - 0x0104: 0x00a4, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0x00a5, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0x008f, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0x0086, # LATIN SMALL LETTER C WITH ACUTE - 0x010c: 0x00ac, # LATIN CAPITAL LETTER C WITH CARON - 0x010d: 0x009f, # LATIN SMALL LETTER C WITH CARON - 0x010e: 0x00d2, # LATIN CAPITAL LETTER D WITH CARON - 0x010f: 0x00d4, # LATIN SMALL LETTER D WITH CARON - 0x0110: 0x00d1, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0x00d0, # LATIN SMALL LETTER D WITH STROKE - 0x0118: 0x00a8, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0x00a9, # LATIN SMALL LETTER E WITH OGONEK - 0x011a: 0x00b7, # LATIN CAPITAL LETTER E WITH CARON - 0x011b: 0x00d8, # LATIN SMALL LETTER E WITH CARON - 0x0139: 0x0091, # LATIN CAPITAL LETTER L WITH ACUTE - 0x013a: 0x0092, # LATIN SMALL LETTER L WITH ACUTE - 0x013d: 0x0095, # LATIN CAPITAL LETTER L WITH CARON - 0x013e: 0x0096, # LATIN SMALL LETTER L WITH CARON - 0x0141: 0x009d, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0x00e4, # LATIN SMALL LETTER N WITH ACUTE - 0x0147: 0x00d5, # LATIN CAPITAL LETTER N WITH CARON - 0x0148: 0x00e5, # LATIN SMALL LETTER N WITH CARON - 0x0150: 0x008a, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x0151: 0x008b, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x0154: 0x00e8, # LATIN CAPITAL LETTER R WITH ACUTE - 0x0155: 0x00ea, # LATIN SMALL LETTER R WITH ACUTE - 0x0158: 0x00fc, # LATIN CAPITAL LETTER R WITH CARON - 0x0159: 0x00fd, # LATIN SMALL LETTER R WITH CARON - 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE - 0x015e: 0x00b8, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015f: 0x00ad, # LATIN SMALL LETTER S WITH CEDILLA - 0x0160: 0x00e6, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0x00e7, # LATIN SMALL LETTER S WITH CARON - 0x0162: 0x00dd, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x0163: 0x00ee, # LATIN SMALL LETTER T WITH CEDILLA - 0x0164: 0x009b, # LATIN CAPITAL LETTER T WITH CARON - 0x0165: 0x009c, # LATIN SMALL LETTER T WITH CARON - 0x016e: 0x00de, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x016f: 0x0085, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0170: 0x00eb, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x0171: 0x00fb, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017a: 0x00ab, # LATIN SMALL LETTER Z WITH ACUTE - 0x017b: 0x00bd, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017c: 0x00be, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017d: 0x00a6, # LATIN CAPITAL LETTER Z WITH CARON - 0x017e: 0x00a7, # LATIN SMALL LETTER Z WITH CARON - 0x02c7: 0x00f3, # CARON - 0x02d8: 0x00f4, # BREVE - 0x02d9: 0x00fa, # DOT ABOVE - 0x02db: 0x00f2, # OGONEK - 0x02dd: 0x00f1, # DOUBLE ACUTE ACCENT - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} \ No newline at end of file + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b4: 0x00ef, # ACUTE ACCENT + 0x00b8: 0x00f7, # CEDILLA + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x009e, # MULTIPLICATION SIGN + 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE + 0x0102: 0x00c6, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0x00c7, # LATIN SMALL LETTER A WITH BREVE + 0x0104: 0x00a4, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00a5, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x008f, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x0086, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00ac, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x009f, # LATIN SMALL LETTER C WITH CARON + 0x010e: 0x00d2, # LATIN CAPITAL LETTER D WITH CARON + 0x010f: 0x00d4, # LATIN SMALL LETTER D WITH CARON + 0x0110: 0x00d1, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0x00d0, # LATIN SMALL LETTER D WITH STROKE + 0x0118: 0x00a8, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00a9, # LATIN SMALL LETTER E WITH OGONEK + 0x011a: 0x00b7, # LATIN CAPITAL LETTER E WITH CARON + 0x011b: 0x00d8, # LATIN SMALL LETTER E WITH CARON + 0x0139: 0x0091, # LATIN CAPITAL LETTER L WITH ACUTE + 0x013a: 0x0092, # LATIN SMALL LETTER L WITH ACUTE + 0x013d: 0x0095, # LATIN CAPITAL LETTER L WITH CARON + 0x013e: 0x0096, # LATIN SMALL LETTER L WITH CARON + 0x0141: 0x009d, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00e4, # LATIN SMALL LETTER N WITH ACUTE + 0x0147: 0x00d5, # LATIN CAPITAL LETTER N WITH CARON + 0x0148: 0x00e5, # LATIN SMALL LETTER N WITH CARON + 0x0150: 0x008a, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0x008b, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0154: 0x00e8, # LATIN CAPITAL LETTER R WITH ACUTE + 0x0155: 0x00ea, # LATIN SMALL LETTER R WITH ACUTE + 0x0158: 0x00fc, # LATIN CAPITAL LETTER R WITH CARON + 0x0159: 0x00fd, # LATIN SMALL LETTER R WITH CARON + 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE + 0x015e: 0x00b8, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x00ad, # LATIN SMALL LETTER S WITH CEDILLA + 0x0160: 0x00e6, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00e7, # LATIN SMALL LETTER S WITH CARON + 0x0162: 0x00dd, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x0163: 0x00ee, # LATIN SMALL LETTER T WITH CEDILLA + 0x0164: 0x009b, # LATIN CAPITAL LETTER T WITH CARON + 0x0165: 0x009c, # LATIN SMALL LETTER T WITH CARON + 0x016e: 0x00de, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x016f: 0x0085, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0170: 0x00eb, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0x00fb, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x00ab, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00bd, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00be, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x00a6, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00a7, # LATIN SMALL LETTER Z WITH CARON + 0x02c7: 0x00f3, # CARON + 0x02d8: 0x00f4, # BREVE + 0x02d9: 0x00fa, # DOT ABOVE + 0x02db: 0x00f2, # OGONEK + 0x02dd: 0x00f1, # DOUBLE ACUTE ACCENT + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} Modified: python/branches/ssize_t/Lib/encodings/cp855.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp855.py (original) +++ python/branches/ssize_t/Lib/encodings/cp855.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,654 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE - 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE - 0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE - 0x0083: 0x0403, # CYRILLIC CAPITAL LETTER GJE - 0x0084: 0x0451, # CYRILLIC SMALL LETTER IO - 0x0085: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x0086: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0087: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0088: 0x0455, # CYRILLIC SMALL LETTER DZE - 0x0089: 0x0405, # CYRILLIC CAPITAL LETTER DZE - 0x008a: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x008b: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x008c: 0x0457, # CYRILLIC SMALL LETTER YI - 0x008d: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x008e: 0x0458, # CYRILLIC SMALL LETTER JE - 0x008f: 0x0408, # CYRILLIC CAPITAL LETTER JE - 0x0090: 0x0459, # CYRILLIC SMALL LETTER LJE - 0x0091: 0x0409, # CYRILLIC CAPITAL LETTER LJE - 0x0092: 0x045a, # CYRILLIC SMALL LETTER NJE - 0x0093: 0x040a, # CYRILLIC CAPITAL LETTER NJE - 0x0094: 0x045b, # CYRILLIC SMALL LETTER TSHE - 0x0095: 0x040b, # CYRILLIC CAPITAL LETTER TSHE - 0x0096: 0x045c, # CYRILLIC SMALL LETTER KJE - 0x0097: 0x040c, # CYRILLIC CAPITAL LETTER KJE - 0x0098: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x0099: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x009a: 0x045f, # CYRILLIC SMALL LETTER DZHE - 0x009b: 0x040f, # CYRILLIC CAPITAL LETTER DZHE - 0x009c: 0x044e, # CYRILLIC SMALL LETTER YU - 0x009d: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x009e: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x009f: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00a1: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x00a2: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00a3: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x00a4: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00a5: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x00a6: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00a7: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x00a8: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00a9: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x00aa: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00ab: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x00ac: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00ad: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00b6: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x00b7: 0x0438, # CYRILLIC SMALL LETTER I - 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00be: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00c7: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00d1: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x00d2: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00d3: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x00d4: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00d5: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x00d6: 0x043e, # CYRILLIC SMALL LETTER O - 0x00d7: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x00d8: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x00de: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00e1: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00e2: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x00e3: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00e4: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x00e5: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00e6: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x00e7: 0x0443, # CYRILLIC SMALL LETTER U - 0x00e8: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x00e9: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00ea: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x00eb: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00ec: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x00ed: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00ee: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x00ef: 0x2116, # NUMERO SIGN - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00f2: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x00f3: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00f4: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x00f5: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00f6: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x00f7: 0x044d, # CYRILLIC SMALL LETTER E - 0x00f8: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00fa: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x00fb: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00fc: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x00fd: 0x00a7, # SECTION SIGN - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE + 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE + 0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE + 0x0083: 0x0403, # CYRILLIC CAPITAL LETTER GJE + 0x0084: 0x0451, # CYRILLIC SMALL LETTER IO + 0x0085: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x0086: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0087: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0088: 0x0455, # CYRILLIC SMALL LETTER DZE + 0x0089: 0x0405, # CYRILLIC CAPITAL LETTER DZE + 0x008a: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x008b: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x008c: 0x0457, # CYRILLIC SMALL LETTER YI + 0x008d: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x008e: 0x0458, # CYRILLIC SMALL LETTER JE + 0x008f: 0x0408, # CYRILLIC CAPITAL LETTER JE + 0x0090: 0x0459, # CYRILLIC SMALL LETTER LJE + 0x0091: 0x0409, # CYRILLIC CAPITAL LETTER LJE + 0x0092: 0x045a, # CYRILLIC SMALL LETTER NJE + 0x0093: 0x040a, # CYRILLIC CAPITAL LETTER NJE + 0x0094: 0x045b, # CYRILLIC SMALL LETTER TSHE + 0x0095: 0x040b, # CYRILLIC CAPITAL LETTER TSHE + 0x0096: 0x045c, # CYRILLIC SMALL LETTER KJE + 0x0097: 0x040c, # CYRILLIC CAPITAL LETTER KJE + 0x0098: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x0099: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x009a: 0x045f, # CYRILLIC SMALL LETTER DZHE + 0x009b: 0x040f, # CYRILLIC CAPITAL LETTER DZHE + 0x009c: 0x044e, # CYRILLIC SMALL LETTER YU + 0x009d: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x009e: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x009f: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00a1: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x00a2: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00a3: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x00a4: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00a5: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x00a6: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00a7: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x00a8: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00a9: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x00aa: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00ab: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x00ac: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00ad: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00b6: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x00b7: 0x0438, # CYRILLIC SMALL LETTER I + 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00be: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00c7: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00d1: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x00d2: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00d3: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x00d4: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00d5: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x00d6: 0x043e, # CYRILLIC SMALL LETTER O + 0x00d7: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x00d8: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x00de: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00e1: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00e2: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x00e3: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00e4: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x00e5: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00e6: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x00e7: 0x0443, # CYRILLIC SMALL LETTER U + 0x00e8: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x00e9: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00ea: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x00eb: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00ec: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x00ed: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00ee: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x00ef: 0x2116, # NUMERO SIGN + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00f2: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x00f3: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00f4: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x00f5: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00f6: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x00f7: 0x044d, # CYRILLIC SMALL LETTER E + 0x00f8: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00fa: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x00fb: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00fc: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x00fd: 0x00a7, # SECTION SIGN + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\u0452' # 0x0080 -> CYRILLIC SMALL LETTER DJE - u'\u0402' # 0x0081 -> CYRILLIC CAPITAL LETTER DJE - u'\u0453' # 0x0082 -> CYRILLIC SMALL LETTER GJE - u'\u0403' # 0x0083 -> CYRILLIC CAPITAL LETTER GJE - u'\u0451' # 0x0084 -> CYRILLIC SMALL LETTER IO - u'\u0401' # 0x0085 -> CYRILLIC CAPITAL LETTER IO - u'\u0454' # 0x0086 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u0404' # 0x0087 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u0455' # 0x0088 -> CYRILLIC SMALL LETTER DZE - u'\u0405' # 0x0089 -> CYRILLIC CAPITAL LETTER DZE - u'\u0456' # 0x008a -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0406' # 0x008b -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0457' # 0x008c -> CYRILLIC SMALL LETTER YI - u'\u0407' # 0x008d -> CYRILLIC CAPITAL LETTER YI - u'\u0458' # 0x008e -> CYRILLIC SMALL LETTER JE - u'\u0408' # 0x008f -> CYRILLIC CAPITAL LETTER JE - u'\u0459' # 0x0090 -> CYRILLIC SMALL LETTER LJE - u'\u0409' # 0x0091 -> CYRILLIC CAPITAL LETTER LJE - u'\u045a' # 0x0092 -> CYRILLIC SMALL LETTER NJE - u'\u040a' # 0x0093 -> CYRILLIC CAPITAL LETTER NJE - u'\u045b' # 0x0094 -> CYRILLIC SMALL LETTER TSHE - u'\u040b' # 0x0095 -> CYRILLIC CAPITAL LETTER TSHE - u'\u045c' # 0x0096 -> CYRILLIC SMALL LETTER KJE - u'\u040c' # 0x0097 -> CYRILLIC CAPITAL LETTER KJE - u'\u045e' # 0x0098 -> CYRILLIC SMALL LETTER SHORT U - u'\u040e' # 0x0099 -> CYRILLIC CAPITAL LETTER SHORT U - u'\u045f' # 0x009a -> CYRILLIC SMALL LETTER DZHE - u'\u040f' # 0x009b -> CYRILLIC CAPITAL LETTER DZHE - u'\u044e' # 0x009c -> CYRILLIC SMALL LETTER YU - u'\u042e' # 0x009d -> CYRILLIC CAPITAL LETTER YU - u'\u044a' # 0x009e -> CYRILLIC SMALL LETTER HARD SIGN - u'\u042a' # 0x009f -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A - u'\u0410' # 0x00a1 -> CYRILLIC CAPITAL LETTER A - u'\u0431' # 0x00a2 -> CYRILLIC SMALL LETTER BE - u'\u0411' # 0x00a3 -> CYRILLIC CAPITAL LETTER BE - u'\u0446' # 0x00a4 -> CYRILLIC SMALL LETTER TSE - u'\u0426' # 0x00a5 -> CYRILLIC CAPITAL LETTER TSE - u'\u0434' # 0x00a6 -> CYRILLIC SMALL LETTER DE - u'\u0414' # 0x00a7 -> CYRILLIC CAPITAL LETTER DE - u'\u0435' # 0x00a8 -> CYRILLIC SMALL LETTER IE - u'\u0415' # 0x00a9 -> CYRILLIC CAPITAL LETTER IE - u'\u0444' # 0x00aa -> CYRILLIC SMALL LETTER EF - u'\u0424' # 0x00ab -> CYRILLIC CAPITAL LETTER EF - u'\u0433' # 0x00ac -> CYRILLIC SMALL LETTER GHE - u'\u0413' # 0x00ad -> CYRILLIC CAPITAL LETTER GHE - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u0445' # 0x00b5 -> CYRILLIC SMALL LETTER HA - u'\u0425' # 0x00b6 -> CYRILLIC CAPITAL LETTER HA - u'\u0438' # 0x00b7 -> CYRILLIC SMALL LETTER I - u'\u0418' # 0x00b8 -> CYRILLIC CAPITAL LETTER I - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u0439' # 0x00bd -> CYRILLIC SMALL LETTER SHORT I - u'\u0419' # 0x00be -> CYRILLIC CAPITAL LETTER SHORT I - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u043a' # 0x00c6 -> CYRILLIC SMALL LETTER KA - u'\u041a' # 0x00c7 -> CYRILLIC CAPITAL LETTER KA - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0x00cf -> CURRENCY SIGN - u'\u043b' # 0x00d0 -> CYRILLIC SMALL LETTER EL - u'\u041b' # 0x00d1 -> CYRILLIC CAPITAL LETTER EL - u'\u043c' # 0x00d2 -> CYRILLIC SMALL LETTER EM - u'\u041c' # 0x00d3 -> CYRILLIC CAPITAL LETTER EM - u'\u043d' # 0x00d4 -> CYRILLIC SMALL LETTER EN - u'\u041d' # 0x00d5 -> CYRILLIC CAPITAL LETTER EN - u'\u043e' # 0x00d6 -> CYRILLIC SMALL LETTER O - u'\u041e' # 0x00d7 -> CYRILLIC CAPITAL LETTER O - u'\u043f' # 0x00d8 -> CYRILLIC SMALL LETTER PE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u041f' # 0x00dd -> CYRILLIC CAPITAL LETTER PE - u'\u044f' # 0x00de -> CYRILLIC SMALL LETTER YA - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u042f' # 0x00e0 -> CYRILLIC CAPITAL LETTER YA - u'\u0440' # 0x00e1 -> CYRILLIC SMALL LETTER ER - u'\u0420' # 0x00e2 -> CYRILLIC CAPITAL LETTER ER - u'\u0441' # 0x00e3 -> CYRILLIC SMALL LETTER ES - u'\u0421' # 0x00e4 -> CYRILLIC CAPITAL LETTER ES - u'\u0442' # 0x00e5 -> CYRILLIC SMALL LETTER TE - u'\u0422' # 0x00e6 -> CYRILLIC CAPITAL LETTER TE - u'\u0443' # 0x00e7 -> CYRILLIC SMALL LETTER U - u'\u0423' # 0x00e8 -> CYRILLIC CAPITAL LETTER U - u'\u0436' # 0x00e9 -> CYRILLIC SMALL LETTER ZHE - u'\u0416' # 0x00ea -> CYRILLIC CAPITAL LETTER ZHE - u'\u0432' # 0x00eb -> CYRILLIC SMALL LETTER VE - u'\u0412' # 0x00ec -> CYRILLIC CAPITAL LETTER VE - u'\u044c' # 0x00ed -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u042c' # 0x00ee -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u2116' # 0x00ef -> NUMERO SIGN - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\u044b' # 0x00f1 -> CYRILLIC SMALL LETTER YERU - u'\u042b' # 0x00f2 -> CYRILLIC CAPITAL LETTER YERU - u'\u0437' # 0x00f3 -> CYRILLIC SMALL LETTER ZE - u'\u0417' # 0x00f4 -> CYRILLIC CAPITAL LETTER ZE - u'\u0448' # 0x00f5 -> CYRILLIC SMALL LETTER SHA - u'\u0428' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHA - u'\u044d' # 0x00f7 -> CYRILLIC SMALL LETTER E - u'\u042d' # 0x00f8 -> CYRILLIC CAPITAL LETTER E - u'\u0449' # 0x00f9 -> CYRILLIC SMALL LETTER SHCHA - u'\u0429' # 0x00fa -> CYRILLIC CAPITAL LETTER SHCHA - u'\u0447' # 0x00fb -> CYRILLIC SMALL LETTER CHE - u'\u0427' # 0x00fc -> CYRILLIC CAPITAL LETTER CHE - u'\xa7' # 0x00fd -> SECTION SIGN - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0452' # 0x0080 -> CYRILLIC SMALL LETTER DJE + u'\u0402' # 0x0081 -> CYRILLIC CAPITAL LETTER DJE + u'\u0453' # 0x0082 -> CYRILLIC SMALL LETTER GJE + u'\u0403' # 0x0083 -> CYRILLIC CAPITAL LETTER GJE + u'\u0451' # 0x0084 -> CYRILLIC SMALL LETTER IO + u'\u0401' # 0x0085 -> CYRILLIC CAPITAL LETTER IO + u'\u0454' # 0x0086 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0404' # 0x0087 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0455' # 0x0088 -> CYRILLIC SMALL LETTER DZE + u'\u0405' # 0x0089 -> CYRILLIC CAPITAL LETTER DZE + u'\u0456' # 0x008a -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0406' # 0x008b -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0457' # 0x008c -> CYRILLIC SMALL LETTER YI + u'\u0407' # 0x008d -> CYRILLIC CAPITAL LETTER YI + u'\u0458' # 0x008e -> CYRILLIC SMALL LETTER JE + u'\u0408' # 0x008f -> CYRILLIC CAPITAL LETTER JE + u'\u0459' # 0x0090 -> CYRILLIC SMALL LETTER LJE + u'\u0409' # 0x0091 -> CYRILLIC CAPITAL LETTER LJE + u'\u045a' # 0x0092 -> CYRILLIC SMALL LETTER NJE + u'\u040a' # 0x0093 -> CYRILLIC CAPITAL LETTER NJE + u'\u045b' # 0x0094 -> CYRILLIC SMALL LETTER TSHE + u'\u040b' # 0x0095 -> CYRILLIC CAPITAL LETTER TSHE + u'\u045c' # 0x0096 -> CYRILLIC SMALL LETTER KJE + u'\u040c' # 0x0097 -> CYRILLIC CAPITAL LETTER KJE + u'\u045e' # 0x0098 -> CYRILLIC SMALL LETTER SHORT U + u'\u040e' # 0x0099 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045f' # 0x009a -> CYRILLIC SMALL LETTER DZHE + u'\u040f' # 0x009b -> CYRILLIC CAPITAL LETTER DZHE + u'\u044e' # 0x009c -> CYRILLIC SMALL LETTER YU + u'\u042e' # 0x009d -> CYRILLIC CAPITAL LETTER YU + u'\u044a' # 0x009e -> CYRILLIC SMALL LETTER HARD SIGN + u'\u042a' # 0x009f -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A + u'\u0410' # 0x00a1 -> CYRILLIC CAPITAL LETTER A + u'\u0431' # 0x00a2 -> CYRILLIC SMALL LETTER BE + u'\u0411' # 0x00a3 -> CYRILLIC CAPITAL LETTER BE + u'\u0446' # 0x00a4 -> CYRILLIC SMALL LETTER TSE + u'\u0426' # 0x00a5 -> CYRILLIC CAPITAL LETTER TSE + u'\u0434' # 0x00a6 -> CYRILLIC SMALL LETTER DE + u'\u0414' # 0x00a7 -> CYRILLIC CAPITAL LETTER DE + u'\u0435' # 0x00a8 -> CYRILLIC SMALL LETTER IE + u'\u0415' # 0x00a9 -> CYRILLIC CAPITAL LETTER IE + u'\u0444' # 0x00aa -> CYRILLIC SMALL LETTER EF + u'\u0424' # 0x00ab -> CYRILLIC CAPITAL LETTER EF + u'\u0433' # 0x00ac -> CYRILLIC SMALL LETTER GHE + u'\u0413' # 0x00ad -> CYRILLIC CAPITAL LETTER GHE + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u0445' # 0x00b5 -> CYRILLIC SMALL LETTER HA + u'\u0425' # 0x00b6 -> CYRILLIC CAPITAL LETTER HA + u'\u0438' # 0x00b7 -> CYRILLIC SMALL LETTER I + u'\u0418' # 0x00b8 -> CYRILLIC CAPITAL LETTER I + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u0439' # 0x00bd -> CYRILLIC SMALL LETTER SHORT I + u'\u0419' # 0x00be -> CYRILLIC CAPITAL LETTER SHORT I + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u043a' # 0x00c6 -> CYRILLIC SMALL LETTER KA + u'\u041a' # 0x00c7 -> CYRILLIC CAPITAL LETTER KA + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\u043b' # 0x00d0 -> CYRILLIC SMALL LETTER EL + u'\u041b' # 0x00d1 -> CYRILLIC CAPITAL LETTER EL + u'\u043c' # 0x00d2 -> CYRILLIC SMALL LETTER EM + u'\u041c' # 0x00d3 -> CYRILLIC CAPITAL LETTER EM + u'\u043d' # 0x00d4 -> CYRILLIC SMALL LETTER EN + u'\u041d' # 0x00d5 -> CYRILLIC CAPITAL LETTER EN + u'\u043e' # 0x00d6 -> CYRILLIC SMALL LETTER O + u'\u041e' # 0x00d7 -> CYRILLIC CAPITAL LETTER O + u'\u043f' # 0x00d8 -> CYRILLIC SMALL LETTER PE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u041f' # 0x00dd -> CYRILLIC CAPITAL LETTER PE + u'\u044f' # 0x00de -> CYRILLIC SMALL LETTER YA + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u042f' # 0x00e0 -> CYRILLIC CAPITAL LETTER YA + u'\u0440' # 0x00e1 -> CYRILLIC SMALL LETTER ER + u'\u0420' # 0x00e2 -> CYRILLIC CAPITAL LETTER ER + u'\u0441' # 0x00e3 -> CYRILLIC SMALL LETTER ES + u'\u0421' # 0x00e4 -> CYRILLIC CAPITAL LETTER ES + u'\u0442' # 0x00e5 -> CYRILLIC SMALL LETTER TE + u'\u0422' # 0x00e6 -> CYRILLIC CAPITAL LETTER TE + u'\u0443' # 0x00e7 -> CYRILLIC SMALL LETTER U + u'\u0423' # 0x00e8 -> CYRILLIC CAPITAL LETTER U + u'\u0436' # 0x00e9 -> CYRILLIC SMALL LETTER ZHE + u'\u0416' # 0x00ea -> CYRILLIC CAPITAL LETTER ZHE + u'\u0432' # 0x00eb -> CYRILLIC SMALL LETTER VE + u'\u0412' # 0x00ec -> CYRILLIC CAPITAL LETTER VE + u'\u044c' # 0x00ed -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u042c' # 0x00ee -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u2116' # 0x00ef -> NUMERO SIGN + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\u044b' # 0x00f1 -> CYRILLIC SMALL LETTER YERU + u'\u042b' # 0x00f2 -> CYRILLIC CAPITAL LETTER YERU + u'\u0437' # 0x00f3 -> CYRILLIC SMALL LETTER ZE + u'\u0417' # 0x00f4 -> CYRILLIC CAPITAL LETTER ZE + u'\u0448' # 0x00f5 -> CYRILLIC SMALL LETTER SHA + u'\u0428' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHA + u'\u044d' # 0x00f7 -> CYRILLIC SMALL LETTER E + u'\u042d' # 0x00f8 -> CYRILLIC CAPITAL LETTER E + u'\u0449' # 0x00f9 -> CYRILLIC SMALL LETTER SHCHA + u'\u0429' # 0x00fa -> CYRILLIC CAPITAL LETTER SHCHA + u'\u0447' # 0x00fb -> CYRILLIC SMALL LETTER CHE + u'\u0427' # 0x00fc -> CYRILLIC CAPITAL LETTER CHE + u'\xa7' # 0x00fd -> SECTION SIGN + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a4: 0x00cf, # CURRENCY SIGN - 0x00a7: 0x00fd, # SECTION SIGN - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x0401: 0x0085, # CYRILLIC CAPITAL LETTER IO - 0x0402: 0x0081, # CYRILLIC CAPITAL LETTER DJE - 0x0403: 0x0083, # CYRILLIC CAPITAL LETTER GJE - 0x0404: 0x0087, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0405: 0x0089, # CYRILLIC CAPITAL LETTER DZE - 0x0406: 0x008b, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0407: 0x008d, # CYRILLIC CAPITAL LETTER YI - 0x0408: 0x008f, # CYRILLIC CAPITAL LETTER JE - 0x0409: 0x0091, # CYRILLIC CAPITAL LETTER LJE - 0x040a: 0x0093, # CYRILLIC CAPITAL LETTER NJE - 0x040b: 0x0095, # CYRILLIC CAPITAL LETTER TSHE - 0x040c: 0x0097, # CYRILLIC CAPITAL LETTER KJE - 0x040e: 0x0099, # CYRILLIC CAPITAL LETTER SHORT U - 0x040f: 0x009b, # CYRILLIC CAPITAL LETTER DZHE - 0x0410: 0x00a1, # CYRILLIC CAPITAL LETTER A - 0x0411: 0x00a3, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0x00ec, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0x00ad, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0x00a7, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0x00a9, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0x00ea, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0x00f4, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0x00b8, # CYRILLIC CAPITAL LETTER I - 0x0419: 0x00be, # CYRILLIC CAPITAL LETTER SHORT I - 0x041a: 0x00c7, # CYRILLIC CAPITAL LETTER KA - 0x041b: 0x00d1, # CYRILLIC CAPITAL LETTER EL - 0x041c: 0x00d3, # CYRILLIC CAPITAL LETTER EM - 0x041d: 0x00d5, # CYRILLIC CAPITAL LETTER EN - 0x041e: 0x00d7, # CYRILLIC CAPITAL LETTER O - 0x041f: 0x00dd, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0x00e2, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0x00e4, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0x00e6, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0x00e8, # CYRILLIC CAPITAL LETTER U - 0x0424: 0x00ab, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0x00b6, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0x00a5, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0x00fc, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0x00f6, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0x00fa, # CYRILLIC CAPITAL LETTER SHCHA - 0x042a: 0x009f, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042b: 0x00f2, # CYRILLIC CAPITAL LETTER YERU - 0x042c: 0x00ee, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042d: 0x00f8, # CYRILLIC CAPITAL LETTER E - 0x042e: 0x009d, # CYRILLIC CAPITAL LETTER YU - 0x042f: 0x00e0, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A - 0x0431: 0x00a2, # CYRILLIC SMALL LETTER BE - 0x0432: 0x00eb, # CYRILLIC SMALL LETTER VE - 0x0433: 0x00ac, # CYRILLIC SMALL LETTER GHE - 0x0434: 0x00a6, # CYRILLIC SMALL LETTER DE - 0x0435: 0x00a8, # CYRILLIC SMALL LETTER IE - 0x0436: 0x00e9, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0x00f3, # CYRILLIC SMALL LETTER ZE - 0x0438: 0x00b7, # CYRILLIC SMALL LETTER I - 0x0439: 0x00bd, # CYRILLIC SMALL LETTER SHORT I - 0x043a: 0x00c6, # CYRILLIC SMALL LETTER KA - 0x043b: 0x00d0, # CYRILLIC SMALL LETTER EL - 0x043c: 0x00d2, # CYRILLIC SMALL LETTER EM - 0x043d: 0x00d4, # CYRILLIC SMALL LETTER EN - 0x043e: 0x00d6, # CYRILLIC SMALL LETTER O - 0x043f: 0x00d8, # CYRILLIC SMALL LETTER PE - 0x0440: 0x00e1, # CYRILLIC SMALL LETTER ER - 0x0441: 0x00e3, # CYRILLIC SMALL LETTER ES - 0x0442: 0x00e5, # CYRILLIC SMALL LETTER TE - 0x0443: 0x00e7, # CYRILLIC SMALL LETTER U - 0x0444: 0x00aa, # CYRILLIC SMALL LETTER EF - 0x0445: 0x00b5, # CYRILLIC SMALL LETTER HA - 0x0446: 0x00a4, # CYRILLIC SMALL LETTER TSE - 0x0447: 0x00fb, # CYRILLIC SMALL LETTER CHE - 0x0448: 0x00f5, # CYRILLIC SMALL LETTER SHA - 0x0449: 0x00f9, # CYRILLIC SMALL LETTER SHCHA - 0x044a: 0x009e, # CYRILLIC SMALL LETTER HARD SIGN - 0x044b: 0x00f1, # CYRILLIC SMALL LETTER YERU - 0x044c: 0x00ed, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044d: 0x00f7, # CYRILLIC SMALL LETTER E - 0x044e: 0x009c, # CYRILLIC SMALL LETTER YU - 0x044f: 0x00de, # CYRILLIC SMALL LETTER YA - 0x0451: 0x0084, # CYRILLIC SMALL LETTER IO - 0x0452: 0x0080, # CYRILLIC SMALL LETTER DJE - 0x0453: 0x0082, # CYRILLIC SMALL LETTER GJE - 0x0454: 0x0086, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0455: 0x0088, # CYRILLIC SMALL LETTER DZE - 0x0456: 0x008a, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0457: 0x008c, # CYRILLIC SMALL LETTER YI - 0x0458: 0x008e, # CYRILLIC SMALL LETTER JE - 0x0459: 0x0090, # CYRILLIC SMALL LETTER LJE - 0x045a: 0x0092, # CYRILLIC SMALL LETTER NJE - 0x045b: 0x0094, # CYRILLIC SMALL LETTER TSHE - 0x045c: 0x0096, # CYRILLIC SMALL LETTER KJE - 0x045e: 0x0098, # CYRILLIC SMALL LETTER SHORT U - 0x045f: 0x009a, # CYRILLIC SMALL LETTER DZHE - 0x2116: 0x00ef, # NUMERO SIGN - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} \ No newline at end of file + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a7: 0x00fd, # SECTION SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x0401: 0x0085, # CYRILLIC CAPITAL LETTER IO + 0x0402: 0x0081, # CYRILLIC CAPITAL LETTER DJE + 0x0403: 0x0083, # CYRILLIC CAPITAL LETTER GJE + 0x0404: 0x0087, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0405: 0x0089, # CYRILLIC CAPITAL LETTER DZE + 0x0406: 0x008b, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0x008d, # CYRILLIC CAPITAL LETTER YI + 0x0408: 0x008f, # CYRILLIC CAPITAL LETTER JE + 0x0409: 0x0091, # CYRILLIC CAPITAL LETTER LJE + 0x040a: 0x0093, # CYRILLIC CAPITAL LETTER NJE + 0x040b: 0x0095, # CYRILLIC CAPITAL LETTER TSHE + 0x040c: 0x0097, # CYRILLIC CAPITAL LETTER KJE + 0x040e: 0x0099, # CYRILLIC CAPITAL LETTER SHORT U + 0x040f: 0x009b, # CYRILLIC CAPITAL LETTER DZHE + 0x0410: 0x00a1, # CYRILLIC CAPITAL LETTER A + 0x0411: 0x00a3, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0x00ec, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0x00ad, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0x00a7, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0x00a9, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0x00ea, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0x00f4, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0x00b8, # CYRILLIC CAPITAL LETTER I + 0x0419: 0x00be, # CYRILLIC CAPITAL LETTER SHORT I + 0x041a: 0x00c7, # CYRILLIC CAPITAL LETTER KA + 0x041b: 0x00d1, # CYRILLIC CAPITAL LETTER EL + 0x041c: 0x00d3, # CYRILLIC CAPITAL LETTER EM + 0x041d: 0x00d5, # CYRILLIC CAPITAL LETTER EN + 0x041e: 0x00d7, # CYRILLIC CAPITAL LETTER O + 0x041f: 0x00dd, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0x00e2, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0x00e4, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0x00e6, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0x00e8, # CYRILLIC CAPITAL LETTER U + 0x0424: 0x00ab, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0x00b6, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0x00a5, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0x00fc, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0x00f6, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0x00fa, # CYRILLIC CAPITAL LETTER SHCHA + 0x042a: 0x009f, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042b: 0x00f2, # CYRILLIC CAPITAL LETTER YERU + 0x042c: 0x00ee, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042d: 0x00f8, # CYRILLIC CAPITAL LETTER E + 0x042e: 0x009d, # CYRILLIC CAPITAL LETTER YU + 0x042f: 0x00e0, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A + 0x0431: 0x00a2, # CYRILLIC SMALL LETTER BE + 0x0432: 0x00eb, # CYRILLIC SMALL LETTER VE + 0x0433: 0x00ac, # CYRILLIC SMALL LETTER GHE + 0x0434: 0x00a6, # CYRILLIC SMALL LETTER DE + 0x0435: 0x00a8, # CYRILLIC SMALL LETTER IE + 0x0436: 0x00e9, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0x00f3, # CYRILLIC SMALL LETTER ZE + 0x0438: 0x00b7, # CYRILLIC SMALL LETTER I + 0x0439: 0x00bd, # CYRILLIC SMALL LETTER SHORT I + 0x043a: 0x00c6, # CYRILLIC SMALL LETTER KA + 0x043b: 0x00d0, # CYRILLIC SMALL LETTER EL + 0x043c: 0x00d2, # CYRILLIC SMALL LETTER EM + 0x043d: 0x00d4, # CYRILLIC SMALL LETTER EN + 0x043e: 0x00d6, # CYRILLIC SMALL LETTER O + 0x043f: 0x00d8, # CYRILLIC SMALL LETTER PE + 0x0440: 0x00e1, # CYRILLIC SMALL LETTER ER + 0x0441: 0x00e3, # CYRILLIC SMALL LETTER ES + 0x0442: 0x00e5, # CYRILLIC SMALL LETTER TE + 0x0443: 0x00e7, # CYRILLIC SMALL LETTER U + 0x0444: 0x00aa, # CYRILLIC SMALL LETTER EF + 0x0445: 0x00b5, # CYRILLIC SMALL LETTER HA + 0x0446: 0x00a4, # CYRILLIC SMALL LETTER TSE + 0x0447: 0x00fb, # CYRILLIC SMALL LETTER CHE + 0x0448: 0x00f5, # CYRILLIC SMALL LETTER SHA + 0x0449: 0x00f9, # CYRILLIC SMALL LETTER SHCHA + 0x044a: 0x009e, # CYRILLIC SMALL LETTER HARD SIGN + 0x044b: 0x00f1, # CYRILLIC SMALL LETTER YERU + 0x044c: 0x00ed, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044d: 0x00f7, # CYRILLIC SMALL LETTER E + 0x044e: 0x009c, # CYRILLIC SMALL LETTER YU + 0x044f: 0x00de, # CYRILLIC SMALL LETTER YA + 0x0451: 0x0084, # CYRILLIC SMALL LETTER IO + 0x0452: 0x0080, # CYRILLIC SMALL LETTER DJE + 0x0453: 0x0082, # CYRILLIC SMALL LETTER GJE + 0x0454: 0x0086, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0455: 0x0088, # CYRILLIC SMALL LETTER DZE + 0x0456: 0x008a, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0x008c, # CYRILLIC SMALL LETTER YI + 0x0458: 0x008e, # CYRILLIC SMALL LETTER JE + 0x0459: 0x0090, # CYRILLIC SMALL LETTER LJE + 0x045a: 0x0092, # CYRILLIC SMALL LETTER NJE + 0x045b: 0x0094, # CYRILLIC SMALL LETTER TSHE + 0x045c: 0x0096, # CYRILLIC SMALL LETTER KJE + 0x045e: 0x0098, # CYRILLIC SMALL LETTER SHORT U + 0x045f: 0x009a, # CYRILLIC SMALL LETTER DZHE + 0x2116: 0x00ef, # NUMERO SIGN + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} Modified: python/branches/ssize_t/Lib/encodings/cp856.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp856.py (original) +++ python/branches/ssize_t/Lib/encodings/cp856.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,481 +32,480 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u05d0' # 0x80 -> HEBREW LETTER ALEF - u'\u05d1' # 0x81 -> HEBREW LETTER BET - u'\u05d2' # 0x82 -> HEBREW LETTER GIMEL - u'\u05d3' # 0x83 -> HEBREW LETTER DALET - u'\u05d4' # 0x84 -> HEBREW LETTER HE - u'\u05d5' # 0x85 -> HEBREW LETTER VAV - u'\u05d6' # 0x86 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0x87 -> HEBREW LETTER HET - u'\u05d8' # 0x88 -> HEBREW LETTER TET - u'\u05d9' # 0x89 -> HEBREW LETTER YOD - u'\u05da' # 0x8A -> HEBREW LETTER FINAL KAF - u'\u05db' # 0x8B -> HEBREW LETTER KAF - u'\u05dc' # 0x8C -> HEBREW LETTER LAMED - u'\u05dd' # 0x8D -> HEBREW LETTER FINAL MEM - u'\u05de' # 0x8E -> HEBREW LETTER MEM - u'\u05df' # 0x8F -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0x90 -> HEBREW LETTER NUN - u'\u05e1' # 0x91 -> HEBREW LETTER SAMEKH - u'\u05e2' # 0x92 -> HEBREW LETTER AYIN - u'\u05e3' # 0x93 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0x94 -> HEBREW LETTER PE - u'\u05e5' # 0x95 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0x96 -> HEBREW LETTER TSADI - u'\u05e7' # 0x97 -> HEBREW LETTER QOF - u'\u05e8' # 0x98 -> HEBREW LETTER RESH - u'\u05e9' # 0x99 -> HEBREW LETTER SHIN - u'\u05ea' # 0x9A -> HEBREW LETTER TAV - u'\ufffe' # 0x9B -> UNDEFINED - u'\xa3' # 0x9C -> POUND SIGN - u'\ufffe' # 0x9D -> UNDEFINED - u'\xd7' # 0x9E -> MULTIPLICATION SIGN - u'\ufffe' # 0x9F -> UNDEFINED - u'\ufffe' # 0xA0 -> UNDEFINED - u'\ufffe' # 0xA1 -> UNDEFINED - u'\ufffe' # 0xA2 -> UNDEFINED - u'\ufffe' # 0xA3 -> UNDEFINED - u'\ufffe' # 0xA4 -> UNDEFINED - u'\ufffe' # 0xA5 -> UNDEFINED - u'\ufffe' # 0xA6 -> UNDEFINED - u'\ufffe' # 0xA7 -> UNDEFINED - u'\ufffe' # 0xA8 -> UNDEFINED - u'\xae' # 0xA9 -> REGISTERED SIGN - u'\xac' # 0xAA -> NOT SIGN - u'\xbd' # 0xAB -> VULGAR FRACTION ONE HALF - u'\xbc' # 0xAC -> VULGAR FRACTION ONE QUARTER - u'\ufffe' # 0xAD -> UNDEFINED - u'\xab' # 0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0xB0 -> LIGHT SHADE - u'\u2592' # 0xB1 -> MEDIUM SHADE - u'\u2593' # 0xB2 -> DARK SHADE - u'\u2502' # 0xB3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0xB4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\ufffe' # 0xB5 -> UNDEFINED - u'\ufffe' # 0xB6 -> UNDEFINED - u'\ufffe' # 0xB7 -> UNDEFINED - u'\xa9' # 0xB8 -> COPYRIGHT SIGN - u'\u2563' # 0xB9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0xBA -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0xBB -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0xBC -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\xa2' # 0xBD -> CENT SIGN - u'\xa5' # 0xBE -> YEN SIGN - u'\u2510' # 0xBF -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0xC0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0xC1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0xC2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0xC3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0xC4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0xC5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\ufffe' # 0xC6 -> UNDEFINED - u'\ufffe' # 0xC7 -> UNDEFINED - u'\u255a' # 0xC8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0xC9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0xCA -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0xCB -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0xCC -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0xCD -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0xCE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0xCF -> CURRENCY SIGN - u'\ufffe' # 0xD0 -> UNDEFINED - u'\ufffe' # 0xD1 -> UNDEFINED - u'\ufffe' # 0xD2 -> UNDEFINED - u'\ufffe' # 0xD3 -> UNDEFINEDS - u'\ufffe' # 0xD4 -> UNDEFINED - u'\ufffe' # 0xD5 -> UNDEFINED - u'\ufffe' # 0xD6 -> UNDEFINEDE - u'\ufffe' # 0xD7 -> UNDEFINED - u'\ufffe' # 0xD8 -> UNDEFINED - u'\u2518' # 0xD9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0xDA -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0xDB -> FULL BLOCK - u'\u2584' # 0xDC -> LOWER HALF BLOCK - u'\xa6' # 0xDD -> BROKEN BAR - u'\ufffe' # 0xDE -> UNDEFINED - u'\u2580' # 0xDF -> UPPER HALF BLOCK - u'\ufffe' # 0xE0 -> UNDEFINED - u'\ufffe' # 0xE1 -> UNDEFINED - u'\ufffe' # 0xE2 -> UNDEFINED - u'\ufffe' # 0xE3 -> UNDEFINED - u'\ufffe' # 0xE4 -> UNDEFINED - u'\ufffe' # 0xE5 -> UNDEFINED - u'\xb5' # 0xE6 -> MICRO SIGN - u'\ufffe' # 0xE7 -> UNDEFINED - u'\ufffe' # 0xE8 -> UNDEFINED - u'\ufffe' # 0xE9 -> UNDEFINED - u'\ufffe' # 0xEA -> UNDEFINED - u'\ufffe' # 0xEB -> UNDEFINED - u'\ufffe' # 0xEC -> UNDEFINED - u'\ufffe' # 0xED -> UNDEFINED - u'\xaf' # 0xEE -> MACRON - u'\xb4' # 0xEF -> ACUTE ACCENT - u'\xad' # 0xF0 -> SOFT HYPHEN - u'\xb1' # 0xF1 -> PLUS-MINUS SIGN - u'\u2017' # 0xF2 -> DOUBLE LOW LINE - u'\xbe' # 0xF3 -> VULGAR FRACTION THREE QUARTERS - u'\xb6' # 0xF4 -> PILCROW SIGN - u'\xa7' # 0xF5 -> SECTION SIGN - u'\xf7' # 0xF6 -> DIVISION SIGN - u'\xb8' # 0xF7 -> CEDILLA - u'\xb0' # 0xF8 -> DEGREE SIGN - u'\xa8' # 0xF9 -> DIAERESIS - u'\xb7' # 0xFA -> MIDDLE DOT - u'\xb9' # 0xFB -> SUPERSCRIPT ONE - u'\xb3' # 0xFC -> SUPERSCRIPT THREE - u'\xb2' # 0xFD -> SUPERSCRIPT TWO - u'\u25a0' # 0xFE -> BLACK SQUARE - u'\xa0' # 0xFF -> NO-BREAK SPACE + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u05d0' # 0x80 -> HEBREW LETTER ALEF + u'\u05d1' # 0x81 -> HEBREW LETTER BET + u'\u05d2' # 0x82 -> HEBREW LETTER GIMEL + u'\u05d3' # 0x83 -> HEBREW LETTER DALET + u'\u05d4' # 0x84 -> HEBREW LETTER HE + u'\u05d5' # 0x85 -> HEBREW LETTER VAV + u'\u05d6' # 0x86 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0x87 -> HEBREW LETTER HET + u'\u05d8' # 0x88 -> HEBREW LETTER TET + u'\u05d9' # 0x89 -> HEBREW LETTER YOD + u'\u05da' # 0x8A -> HEBREW LETTER FINAL KAF + u'\u05db' # 0x8B -> HEBREW LETTER KAF + u'\u05dc' # 0x8C -> HEBREW LETTER LAMED + u'\u05dd' # 0x8D -> HEBREW LETTER FINAL MEM + u'\u05de' # 0x8E -> HEBREW LETTER MEM + u'\u05df' # 0x8F -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0x90 -> HEBREW LETTER NUN + u'\u05e1' # 0x91 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0x92 -> HEBREW LETTER AYIN + u'\u05e3' # 0x93 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0x94 -> HEBREW LETTER PE + u'\u05e5' # 0x95 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0x96 -> HEBREW LETTER TSADI + u'\u05e7' # 0x97 -> HEBREW LETTER QOF + u'\u05e8' # 0x98 -> HEBREW LETTER RESH + u'\u05e9' # 0x99 -> HEBREW LETTER SHIN + u'\u05ea' # 0x9A -> HEBREW LETTER TAV + u'\ufffe' # 0x9B -> UNDEFINED + u'\xa3' # 0x9C -> POUND SIGN + u'\ufffe' # 0x9D -> UNDEFINED + u'\xd7' # 0x9E -> MULTIPLICATION SIGN + u'\ufffe' # 0x9F -> UNDEFINED + u'\ufffe' # 0xA0 -> UNDEFINED + u'\ufffe' # 0xA1 -> UNDEFINED + u'\ufffe' # 0xA2 -> UNDEFINED + u'\ufffe' # 0xA3 -> UNDEFINED + u'\ufffe' # 0xA4 -> UNDEFINED + u'\ufffe' # 0xA5 -> UNDEFINED + u'\ufffe' # 0xA6 -> UNDEFINED + u'\ufffe' # 0xA7 -> UNDEFINED + u'\ufffe' # 0xA8 -> UNDEFINED + u'\xae' # 0xA9 -> REGISTERED SIGN + u'\xac' # 0xAA -> NOT SIGN + u'\xbd' # 0xAB -> VULGAR FRACTION ONE HALF + u'\xbc' # 0xAC -> VULGAR FRACTION ONE QUARTER + u'\ufffe' # 0xAD -> UNDEFINED + u'\xab' # 0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0xB0 -> LIGHT SHADE + u'\u2592' # 0xB1 -> MEDIUM SHADE + u'\u2593' # 0xB2 -> DARK SHADE + u'\u2502' # 0xB3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0xB4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\ufffe' # 0xB5 -> UNDEFINED + u'\ufffe' # 0xB6 -> UNDEFINED + u'\ufffe' # 0xB7 -> UNDEFINED + u'\xa9' # 0xB8 -> COPYRIGHT SIGN + u'\u2563' # 0xB9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0xBA -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0xBB -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0xBC -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\xa2' # 0xBD -> CENT SIGN + u'\xa5' # 0xBE -> YEN SIGN + u'\u2510' # 0xBF -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0xC0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0xC1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0xC2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0xC3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0xC4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0xC5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\ufffe' # 0xC6 -> UNDEFINED + u'\ufffe' # 0xC7 -> UNDEFINED + u'\u255a' # 0xC8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0xC9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0xCA -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0xCB -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0xCC -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0xCD -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0xCE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0xCF -> CURRENCY SIGN + u'\ufffe' # 0xD0 -> UNDEFINED + u'\ufffe' # 0xD1 -> UNDEFINED + u'\ufffe' # 0xD2 -> UNDEFINED + u'\ufffe' # 0xD3 -> UNDEFINEDS + u'\ufffe' # 0xD4 -> UNDEFINED + u'\ufffe' # 0xD5 -> UNDEFINED + u'\ufffe' # 0xD6 -> UNDEFINEDE + u'\ufffe' # 0xD7 -> UNDEFINED + u'\ufffe' # 0xD8 -> UNDEFINED + u'\u2518' # 0xD9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0xDA -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0xDB -> FULL BLOCK + u'\u2584' # 0xDC -> LOWER HALF BLOCK + u'\xa6' # 0xDD -> BROKEN BAR + u'\ufffe' # 0xDE -> UNDEFINED + u'\u2580' # 0xDF -> UPPER HALF BLOCK + u'\ufffe' # 0xE0 -> UNDEFINED + u'\ufffe' # 0xE1 -> UNDEFINED + u'\ufffe' # 0xE2 -> UNDEFINED + u'\ufffe' # 0xE3 -> UNDEFINED + u'\ufffe' # 0xE4 -> UNDEFINED + u'\ufffe' # 0xE5 -> UNDEFINED + u'\xb5' # 0xE6 -> MICRO SIGN + u'\ufffe' # 0xE7 -> UNDEFINED + u'\ufffe' # 0xE8 -> UNDEFINED + u'\ufffe' # 0xE9 -> UNDEFINED + u'\ufffe' # 0xEA -> UNDEFINED + u'\ufffe' # 0xEB -> UNDEFINED + u'\ufffe' # 0xEC -> UNDEFINED + u'\ufffe' # 0xED -> UNDEFINED + u'\xaf' # 0xEE -> MACRON + u'\xb4' # 0xEF -> ACUTE ACCENT + u'\xad' # 0xF0 -> SOFT HYPHEN + u'\xb1' # 0xF1 -> PLUS-MINUS SIGN + u'\u2017' # 0xF2 -> DOUBLE LOW LINE + u'\xbe' # 0xF3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0xF4 -> PILCROW SIGN + u'\xa7' # 0xF5 -> SECTION SIGN + u'\xf7' # 0xF6 -> DIVISION SIGN + u'\xb8' # 0xF7 -> CEDILLA + u'\xb0' # 0xF8 -> DEGREE SIGN + u'\xa8' # 0xF9 -> DIAERESIS + u'\xb7' # 0xFA -> MIDDLE DOT + u'\xb9' # 0xFB -> SUPERSCRIPT ONE + u'\xb3' # 0xFC -> SUPERSCRIPT THREE + u'\xb2' # 0xFD -> SUPERSCRIPT TWO + u'\u25a0' # 0xFE -> BLACK SQUARE + u'\xa0' # 0xFF -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xFF, # NO-BREAK SPACE - 0x00A2: 0xBD, # CENT SIGN - 0x00A3: 0x9C, # POUND SIGN - 0x00A4: 0xCF, # CURRENCY SIGN - 0x00A5: 0xBE, # YEN SIGN - 0x00A6: 0xDD, # BROKEN BAR - 0x00A7: 0xF5, # SECTION SIGN - 0x00A8: 0xF9, # DIAERESIS - 0x00A9: 0xB8, # COPYRIGHT SIGN - 0x00AB: 0xAE, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAA, # NOT SIGN - 0x00AD: 0xF0, # SOFT HYPHEN - 0x00AE: 0xA9, # REGISTERED SIGN - 0x00AF: 0xEE, # MACRON - 0x00B0: 0xF8, # DEGREE SIGN - 0x00B1: 0xF1, # PLUS-MINUS SIGN - 0x00B2: 0xFD, # SUPERSCRIPT TWO - 0x00B3: 0xFC, # SUPERSCRIPT THREE - 0x00B4: 0xEF, # ACUTE ACCENT - 0x00B5: 0xE6, # MICRO SIGN - 0x00B6: 0xF4, # PILCROW SIGN - 0x00B7: 0xFA, # MIDDLE DOT - 0x00B8: 0xF7, # CEDILLA - 0x00B9: 0xFB, # SUPERSCRIPT ONE - 0x00BB: 0xAF, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xAC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xAB, # VULGAR FRACTION ONE HALF - 0x00BE: 0xF3, # VULGAR FRACTION THREE QUARTERS - 0x00D7: 0x9E, # MULTIPLICATION SIGN - 0x00F7: 0xF6, # DIVISION SIGN - 0x05D0: 0x80, # HEBREW LETTER ALEF - 0x05D1: 0x81, # HEBREW LETTER BET - 0x05D2: 0x82, # HEBREW LETTER GIMEL - 0x05D3: 0x83, # HEBREW LETTER DALET - 0x05D4: 0x84, # HEBREW LETTER HE - 0x05D5: 0x85, # HEBREW LETTER VAV - 0x05D6: 0x86, # HEBREW LETTER ZAYIN - 0x05D7: 0x87, # HEBREW LETTER HET - 0x05D8: 0x88, # HEBREW LETTER TET - 0x05D9: 0x89, # HEBREW LETTER YOD - 0x05DA: 0x8A, # HEBREW LETTER FINAL KAF - 0x05DB: 0x8B, # HEBREW LETTER KAF - 0x05DC: 0x8C, # HEBREW LETTER LAMED - 0x05DD: 0x8D, # HEBREW LETTER FINAL MEM - 0x05DE: 0x8E, # HEBREW LETTER MEM - 0x05DF: 0x8F, # HEBREW LETTER FINAL NUN - 0x05E0: 0x90, # HEBREW LETTER NUN - 0x05E1: 0x91, # HEBREW LETTER SAMEKH - 0x05E2: 0x92, # HEBREW LETTER AYIN - 0x05E3: 0x93, # HEBREW LETTER FINAL PE - 0x05E4: 0x94, # HEBREW LETTER PE - 0x05E5: 0x95, # HEBREW LETTER FINAL TSADI - 0x05E6: 0x96, # HEBREW LETTER TSADI - 0x05E7: 0x97, # HEBREW LETTER QOF - 0x05E8: 0x98, # HEBREW LETTER RESH - 0x05E9: 0x99, # HEBREW LETTER SHIN - 0x05EA: 0x9A, # HEBREW LETTER TAV - 0x2017: 0xF2, # DOUBLE LOW LINE - 0x2500: 0xC4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0xB3, # BOX DRAWINGS LIGHT VERTICAL - 0x250C: 0xDA, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0xBF, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0xC0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0xD9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251C: 0xC3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0xB4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252C: 0xC2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0xC1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253C: 0xC5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0xCD, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0xBA, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0xC9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0xBB, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255A: 0xC8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255D: 0xBC, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0xCC, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0xB9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0xCB, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0xCA, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256C: 0xCE, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0xDF, # UPPER HALF BLOCK - 0x2584: 0xDC, # LOWER HALF BLOCK - 0x2588: 0xDB, # FULL BLOCK - 0x2591: 0xB0, # LIGHT SHADE - 0x2592: 0xB1, # MEDIUM SHADE - 0x2593: 0xB2, # DARK SHADE - 0x25A0: 0xFE, # BLACK SQUARE + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xFF, # NO-BREAK SPACE + 0x00A2: 0xBD, # CENT SIGN + 0x00A3: 0x9C, # POUND SIGN + 0x00A4: 0xCF, # CURRENCY SIGN + 0x00A5: 0xBE, # YEN SIGN + 0x00A6: 0xDD, # BROKEN BAR + 0x00A7: 0xF5, # SECTION SIGN + 0x00A8: 0xF9, # DIAERESIS + 0x00A9: 0xB8, # COPYRIGHT SIGN + 0x00AB: 0xAE, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAA, # NOT SIGN + 0x00AD: 0xF0, # SOFT HYPHEN + 0x00AE: 0xA9, # REGISTERED SIGN + 0x00AF: 0xEE, # MACRON + 0x00B0: 0xF8, # DEGREE SIGN + 0x00B1: 0xF1, # PLUS-MINUS SIGN + 0x00B2: 0xFD, # SUPERSCRIPT TWO + 0x00B3: 0xFC, # SUPERSCRIPT THREE + 0x00B4: 0xEF, # ACUTE ACCENT + 0x00B5: 0xE6, # MICRO SIGN + 0x00B6: 0xF4, # PILCROW SIGN + 0x00B7: 0xFA, # MIDDLE DOT + 0x00B8: 0xF7, # CEDILLA + 0x00B9: 0xFB, # SUPERSCRIPT ONE + 0x00BB: 0xAF, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xAC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xAB, # VULGAR FRACTION ONE HALF + 0x00BE: 0xF3, # VULGAR FRACTION THREE QUARTERS + 0x00D7: 0x9E, # MULTIPLICATION SIGN + 0x00F7: 0xF6, # DIVISION SIGN + 0x05D0: 0x80, # HEBREW LETTER ALEF + 0x05D1: 0x81, # HEBREW LETTER BET + 0x05D2: 0x82, # HEBREW LETTER GIMEL + 0x05D3: 0x83, # HEBREW LETTER DALET + 0x05D4: 0x84, # HEBREW LETTER HE + 0x05D5: 0x85, # HEBREW LETTER VAV + 0x05D6: 0x86, # HEBREW LETTER ZAYIN + 0x05D7: 0x87, # HEBREW LETTER HET + 0x05D8: 0x88, # HEBREW LETTER TET + 0x05D9: 0x89, # HEBREW LETTER YOD + 0x05DA: 0x8A, # HEBREW LETTER FINAL KAF + 0x05DB: 0x8B, # HEBREW LETTER KAF + 0x05DC: 0x8C, # HEBREW LETTER LAMED + 0x05DD: 0x8D, # HEBREW LETTER FINAL MEM + 0x05DE: 0x8E, # HEBREW LETTER MEM + 0x05DF: 0x8F, # HEBREW LETTER FINAL NUN + 0x05E0: 0x90, # HEBREW LETTER NUN + 0x05E1: 0x91, # HEBREW LETTER SAMEKH + 0x05E2: 0x92, # HEBREW LETTER AYIN + 0x05E3: 0x93, # HEBREW LETTER FINAL PE + 0x05E4: 0x94, # HEBREW LETTER PE + 0x05E5: 0x95, # HEBREW LETTER FINAL TSADI + 0x05E6: 0x96, # HEBREW LETTER TSADI + 0x05E7: 0x97, # HEBREW LETTER QOF + 0x05E8: 0x98, # HEBREW LETTER RESH + 0x05E9: 0x99, # HEBREW LETTER SHIN + 0x05EA: 0x9A, # HEBREW LETTER TAV + 0x2017: 0xF2, # DOUBLE LOW LINE + 0x2500: 0xC4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0xB3, # BOX DRAWINGS LIGHT VERTICAL + 0x250C: 0xDA, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0xBF, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0xC0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0xD9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251C: 0xC3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0xB4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252C: 0xC2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0xC1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253C: 0xC5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0xCD, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0xBA, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0xC9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0xBB, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255A: 0xC8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255D: 0xBC, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0xCC, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0xB9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0xCB, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0xCA, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256C: 0xCE, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0xDF, # UPPER HALF BLOCK + 0x2584: 0xDC, # LOWER HALF BLOCK + 0x2588: 0xDB, # FULL BLOCK + 0x2591: 0xB0, # LIGHT SHADE + 0x2592: 0xB1, # MEDIUM SHADE + 0x2593: 0xB2, # DARK SHADE + 0x25A0: 0xFE, # BLACK SQUARE } - Modified: python/branches/ssize_t/Lib/encodings/cp857.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp857.py (original) +++ python/branches/ssize_t/Lib/encodings/cp857.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,650 +32,650 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x009f: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00a7: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00b8: 0x00a9, # COPYRIGHT SIGN - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x00a2, # CENT SIGN - 0x00be: 0x00a5, # YEN SIGN - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00d1: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00d5: None, # UNDEFINED - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x00a6, # BROKEN BAR - 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: None, # UNDEFINED - 0x00e8: 0x00d7, # MULTIPLICATION SIGN - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00ed: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00ee: 0x00af, # MACRON - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: None, # UNDEFINED - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x009f: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x00a7: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00b8: 0x00a9, # COPYRIGHT SIGN + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x00a2, # CENT SIGN + 0x00be: 0x00a5, # YEN SIGN + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00d1: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00d5: None, # UNDEFINED + 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x00a6, # BROKEN BAR + 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: None, # UNDEFINED + 0x00e8: 0x00d7, # MULTIPLICATION SIGN + 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00ed: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00ee: 0x00af, # MACRON + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: None, # UNDEFINED + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u0131' # 0x008d -> LATIN SMALL LETTER DOTLESS I - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\u0130' # 0x0098 -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE - u'\xa3' # 0x009c -> POUND SIGN - u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE - u'\u015e' # 0x009e -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u015f' # 0x009f -> LATIN SMALL LETTER S WITH CEDILLA - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\u011e' # 0x00a6 -> LATIN CAPITAL LETTER G WITH BREVE - u'\u011f' # 0x00a7 -> LATIN SMALL LETTER G WITH BREVE - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\xae' # 0x00a9 -> REGISTERED SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xa9' # 0x00b8 -> COPYRIGHT SIGN - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\xa2' # 0x00bd -> CENT SIGN - u'\xa5' # 0x00be -> YEN SIGN - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE - u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0x00cf -> CURRENCY SIGN - u'\xba' # 0x00d0 -> MASCULINE ORDINAL INDICATOR - u'\xaa' # 0x00d1 -> FEMININE ORDINAL INDICATOR - u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\ufffe' # 0x00d5 -> UNDEFINED - u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\xa6' # 0x00dd -> BROKEN BAR - u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE - u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\ufffe' # 0x00e7 -> UNDEFINED - u'\xd7' # 0x00e8 -> MULTIPLICATION SIGN - u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE - u'\xff' # 0x00ed -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xaf' # 0x00ee -> MACRON - u'\xb4' # 0x00ef -> ACUTE ACCENT - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\ufffe' # 0x00f2 -> UNDEFINED - u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS - u'\xb6' # 0x00f4 -> PILCROW SIGN - u'\xa7' # 0x00f5 -> SECTION SIGN - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\xb8' # 0x00f7 -> CEDILLA - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\xa8' # 0x00f9 -> DIAERESIS - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\xb9' # 0x00fb -> SUPERSCRIPT ONE - u'\xb3' # 0x00fc -> SUPERSCRIPT THREE - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u0131' # 0x008d -> LATIN SMALL LETTER DOTLESS I + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\u0130' # 0x0098 -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\u015e' # 0x009e -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u015f' # 0x009f -> LATIN SMALL LETTER S WITH CEDILLA + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\u011e' # 0x00a6 -> LATIN CAPITAL LETTER G WITH BREVE + u'\u011f' # 0x00a7 -> LATIN SMALL LETTER G WITH BREVE + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\xae' # 0x00a9 -> REGISTERED SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xa9' # 0x00b8 -> COPYRIGHT SIGN + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\xa2' # 0x00bd -> CENT SIGN + u'\xa5' # 0x00be -> YEN SIGN + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE + u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\xba' # 0x00d0 -> MASCULINE ORDINAL INDICATOR + u'\xaa' # 0x00d1 -> FEMININE ORDINAL INDICATOR + u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\ufffe' # 0x00d5 -> UNDEFINED + u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\xa6' # 0x00dd -> BROKEN BAR + u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE + u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\ufffe' # 0x00e7 -> UNDEFINED + u'\xd7' # 0x00e8 -> MULTIPLICATION SIGN + u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xff' # 0x00ed -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xaf' # 0x00ee -> MACRON + u'\xb4' # 0x00ef -> ACUTE ACCENT + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\ufffe' # 0x00f2 -> UNDEFINED + u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0x00f4 -> PILCROW SIGN + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\xb8' # 0x00f7 -> CEDILLA + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\xb9' # 0x00fb -> SUPERSCRIPT ONE + u'\xb3' # 0x00fc -> SUPERSCRIPT THREE + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a2: 0x00bd, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a4: 0x00cf, # CURRENCY SIGN - 0x00a5: 0x00be, # YEN SIGN - 0x00a6: 0x00dd, # BROKEN BAR - 0x00a7: 0x00f5, # SECTION SIGN - 0x00a8: 0x00f9, # DIAERESIS - 0x00a9: 0x00b8, # COPYRIGHT SIGN - 0x00aa: 0x00d1, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00ae: 0x00a9, # REGISTERED SIGN - 0x00af: 0x00ee, # MACRON - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b3: 0x00fc, # SUPERSCRIPT THREE - 0x00b4: 0x00ef, # ACUTE ACCENT - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b6: 0x00f4, # PILCROW SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00b8: 0x00f7, # CEDILLA - 0x00b9: 0x00fb, # SUPERSCRIPT ONE - 0x00ba: 0x00d0, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0x00e8, # MULTIPLICATION SIGN - 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00ff: 0x00ed, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x011e: 0x00a6, # LATIN CAPITAL LETTER G WITH BREVE - 0x011f: 0x00a7, # LATIN SMALL LETTER G WITH BREVE - 0x0130: 0x0098, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0131: 0x008d, # LATIN SMALL LETTER DOTLESS I - 0x015e: 0x009e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015f: 0x009f, # LATIN SMALL LETTER S WITH CEDILLA - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} \ No newline at end of file + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00bd, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a5: 0x00be, # YEN SIGN + 0x00a6: 0x00dd, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00a9: 0x00b8, # COPYRIGHT SIGN + 0x00aa: 0x00d1, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00ae: 0x00a9, # REGISTERED SIGN + 0x00af: 0x00ee, # MACRON + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00fc, # SUPERSCRIPT THREE + 0x00b4: 0x00ef, # ACUTE ACCENT + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x00f4, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b8: 0x00f7, # CEDILLA + 0x00b9: 0x00fb, # SUPERSCRIPT ONE + 0x00ba: 0x00d0, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00e8, # MULTIPLICATION SIGN + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x00ed, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011e: 0x00a6, # LATIN CAPITAL LETTER G WITH BREVE + 0x011f: 0x00a7, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0x0098, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0x008d, # LATIN SMALL LETTER DOTLESS I + 0x015e: 0x009e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x009f, # LATIN SMALL LETTER S WITH CEDILLA + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} Modified: python/branches/ssize_t/Lib/encodings/cp860.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp860.py (original) +++ python/branches/ssize_t/Lib/encodings/cp860.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,654 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x008c: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x008f: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x0092: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x0099: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x008c: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x008f: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x0092: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x0099: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0x0084 -> LATIN SMALL LETTER A WITH TILDE - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xc1' # 0x0086 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xca' # 0x0089 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xcd' # 0x008b -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xd4' # 0x008c -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE - u'\xc3' # 0x008e -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc2' # 0x008f -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xc0' # 0x0091 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc8' # 0x0092 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0x0094 -> LATIN SMALL LETTER O WITH TILDE - u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE - u'\xda' # 0x0096 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\xcc' # 0x0098 -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd5' # 0x0099 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xa2' # 0x009b -> CENT SIGN - u'\xa3' # 0x009c -> POUND SIGN - u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u20a7' # 0x009e -> PESETA SIGN - u'\xd3' # 0x009f -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\xd2' # 0x00a9 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x0084 -> LATIN SMALL LETTER A WITH TILDE + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xc1' # 0x0086 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xca' # 0x0089 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xcd' # 0x008b -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xd4' # 0x008c -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE + u'\xc3' # 0x008e -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc2' # 0x008f -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xc0' # 0x0091 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc8' # 0x0092 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x0094 -> LATIN SMALL LETTER O WITH TILDE + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xda' # 0x0096 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xcc' # 0x0098 -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd5' # 0x0099 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xa2' # 0x009b -> CENT SIGN + u'\xa3' # 0x009c -> POUND SIGN + u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u20a7' # 0x009e -> PESETA SIGN + u'\xd3' # 0x009f -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\xd2' # 0x00a9 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a2: 0x009b, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c0: 0x0091, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0x0086, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x008f, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0x008e, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x0092, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x0089, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cc: 0x0098, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0x008b, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0x00a9, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0x009f, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0x008c, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0x0099, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0x0096, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x0084, # LATIN SMALL LETTER A WITH TILDE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0x0094, # LATIN SMALL LETTER O WITH TILDE - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x20a7: 0x009e, # PESETA SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} \ No newline at end of file + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x009b, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c0: 0x0091, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x0086, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x008f, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x008e, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x0092, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x0089, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cc: 0x0098, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x008b, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00a9, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x009f, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x008c, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x0099, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x0096, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x0084, # LATIN SMALL LETTER A WITH TILDE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x0094, # LATIN SMALL LETTER O WITH TILDE + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} Modified: python/branches/ssize_t/Lib/encodings/cp861.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp861.py (original) +++ python/branches/ssize_t/Lib/encodings/cp861.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,654 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00d0, # LATIN CAPITAL LETTER ETH - 0x008c: 0x00f0, # LATIN SMALL LETTER ETH - 0x008d: 0x00de, # LATIN CAPITAL LETTER THORN - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00fe, # LATIN SMALL LETTER THORN - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x0098: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00a5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00a6: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00a7: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x008c: 0x00f0, # LATIN SMALL LETTER ETH + 0x008d: 0x00de, # LATIN CAPITAL LETTER THORN + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00fe, # LATIN SMALL LETTER THORN + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x0098: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00a5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00a6: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00a7: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xd0' # 0x008b -> LATIN CAPITAL LETTER ETH - u'\xf0' # 0x008c -> LATIN SMALL LETTER ETH - u'\xde' # 0x008d -> LATIN CAPITAL LETTER THORN - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xfe' # 0x0095 -> LATIN SMALL LETTER THORN - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xdd' # 0x0097 -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xfd' # 0x0098 -> LATIN SMALL LETTER Y WITH ACUTE - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE - u'\xa3' # 0x009c -> POUND SIGN - u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE - u'\u20a7' # 0x009e -> PESETA SIGN - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xc1' # 0x00a4 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcd' # 0x00a5 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xd3' # 0x00a6 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xda' # 0x00a7 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\u2310' # 0x00a9 -> REVERSED NOT SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xd0' # 0x008b -> LATIN CAPITAL LETTER ETH + u'\xf0' # 0x008c -> LATIN SMALL LETTER ETH + u'\xde' # 0x008d -> LATIN CAPITAL LETTER THORN + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xfe' # 0x0095 -> LATIN SMALL LETTER THORN + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xdd' # 0x0097 -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xfd' # 0x0098 -> LATIN SMALL LETTER Y WITH ACUTE + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\u20a7' # 0x009e -> PESETA SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xc1' # 0x00a4 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcd' # 0x00a5 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xd3' # 0x00a6 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xda' # 0x00a7 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a3: 0x009c, # POUND SIGN - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c1: 0x00a4, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00cd: 0x00a5, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d0: 0x008b, # LATIN CAPITAL LETTER ETH - 0x00d3: 0x00a6, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE - 0x00da: 0x00a7, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0x0097, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00de: 0x008d, # LATIN CAPITAL LETTER THORN - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00f0: 0x008c, # LATIN SMALL LETTER ETH - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0x0098, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fe: 0x0095, # LATIN SMALL LETTER THORN - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x20a7: 0x009e, # PESETA SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2310: 0x00a9, # REVERSED NOT SIGN - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} \ No newline at end of file + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a3: 0x009c, # POUND SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c1: 0x00a4, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00cd: 0x00a5, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d0: 0x008b, # LATIN CAPITAL LETTER ETH + 0x00d3: 0x00a6, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00da: 0x00a7, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x0097, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x008d, # LATIN CAPITAL LETTER THORN + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00f0: 0x008c, # LATIN SMALL LETTER ETH + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x0098, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x0095, # LATIN SMALL LETTER THORN + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} Modified: python/branches/ssize_t/Lib/encodings/cp862.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp862.py (original) +++ python/branches/ssize_t/Lib/encodings/cp862.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,654 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x05d0, # HEBREW LETTER ALEF - 0x0081: 0x05d1, # HEBREW LETTER BET - 0x0082: 0x05d2, # HEBREW LETTER GIMEL - 0x0083: 0x05d3, # HEBREW LETTER DALET - 0x0084: 0x05d4, # HEBREW LETTER HE - 0x0085: 0x05d5, # HEBREW LETTER VAV - 0x0086: 0x05d6, # HEBREW LETTER ZAYIN - 0x0087: 0x05d7, # HEBREW LETTER HET - 0x0088: 0x05d8, # HEBREW LETTER TET - 0x0089: 0x05d9, # HEBREW LETTER YOD - 0x008a: 0x05da, # HEBREW LETTER FINAL KAF - 0x008b: 0x05db, # HEBREW LETTER KAF - 0x008c: 0x05dc, # HEBREW LETTER LAMED - 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM - 0x008e: 0x05de, # HEBREW LETTER MEM - 0x008f: 0x05df, # HEBREW LETTER FINAL NUN - 0x0090: 0x05e0, # HEBREW LETTER NUN - 0x0091: 0x05e1, # HEBREW LETTER SAMEKH - 0x0092: 0x05e2, # HEBREW LETTER AYIN - 0x0093: 0x05e3, # HEBREW LETTER FINAL PE - 0x0094: 0x05e4, # HEBREW LETTER PE - 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x0096: 0x05e6, # HEBREW LETTER TSADI - 0x0097: 0x05e7, # HEBREW LETTER QOF - 0x0098: 0x05e8, # HEBREW LETTER RESH - 0x0099: 0x05e9, # HEBREW LETTER SHIN - 0x009a: 0x05ea, # HEBREW LETTER TAV - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00a5, # YEN SIGN - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x05d0, # HEBREW LETTER ALEF + 0x0081: 0x05d1, # HEBREW LETTER BET + 0x0082: 0x05d2, # HEBREW LETTER GIMEL + 0x0083: 0x05d3, # HEBREW LETTER DALET + 0x0084: 0x05d4, # HEBREW LETTER HE + 0x0085: 0x05d5, # HEBREW LETTER VAV + 0x0086: 0x05d6, # HEBREW LETTER ZAYIN + 0x0087: 0x05d7, # HEBREW LETTER HET + 0x0088: 0x05d8, # HEBREW LETTER TET + 0x0089: 0x05d9, # HEBREW LETTER YOD + 0x008a: 0x05da, # HEBREW LETTER FINAL KAF + 0x008b: 0x05db, # HEBREW LETTER KAF + 0x008c: 0x05dc, # HEBREW LETTER LAMED + 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM + 0x008e: 0x05de, # HEBREW LETTER MEM + 0x008f: 0x05df, # HEBREW LETTER FINAL NUN + 0x0090: 0x05e0, # HEBREW LETTER NUN + 0x0091: 0x05e1, # HEBREW LETTER SAMEKH + 0x0092: 0x05e2, # HEBREW LETTER AYIN + 0x0093: 0x05e3, # HEBREW LETTER FINAL PE + 0x0094: 0x05e4, # HEBREW LETTER PE + 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI + 0x0096: 0x05e6, # HEBREW LETTER TSADI + 0x0097: 0x05e7, # HEBREW LETTER QOF + 0x0098: 0x05e8, # HEBREW LETTER RESH + 0x0099: 0x05e9, # HEBREW LETTER SHIN + 0x009a: 0x05ea, # HEBREW LETTER TAV + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00a5, # YEN SIGN + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\u05d0' # 0x0080 -> HEBREW LETTER ALEF - u'\u05d1' # 0x0081 -> HEBREW LETTER BET - u'\u05d2' # 0x0082 -> HEBREW LETTER GIMEL - u'\u05d3' # 0x0083 -> HEBREW LETTER DALET - u'\u05d4' # 0x0084 -> HEBREW LETTER HE - u'\u05d5' # 0x0085 -> HEBREW LETTER VAV - u'\u05d6' # 0x0086 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0x0087 -> HEBREW LETTER HET - u'\u05d8' # 0x0088 -> HEBREW LETTER TET - u'\u05d9' # 0x0089 -> HEBREW LETTER YOD - u'\u05da' # 0x008a -> HEBREW LETTER FINAL KAF - u'\u05db' # 0x008b -> HEBREW LETTER KAF - u'\u05dc' # 0x008c -> HEBREW LETTER LAMED - u'\u05dd' # 0x008d -> HEBREW LETTER FINAL MEM - u'\u05de' # 0x008e -> HEBREW LETTER MEM - u'\u05df' # 0x008f -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0x0090 -> HEBREW LETTER NUN - u'\u05e1' # 0x0091 -> HEBREW LETTER SAMEKH - u'\u05e2' # 0x0092 -> HEBREW LETTER AYIN - u'\u05e3' # 0x0093 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0x0094 -> HEBREW LETTER PE - u'\u05e5' # 0x0095 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0x0096 -> HEBREW LETTER TSADI - u'\u05e7' # 0x0097 -> HEBREW LETTER QOF - u'\u05e8' # 0x0098 -> HEBREW LETTER RESH - u'\u05e9' # 0x0099 -> HEBREW LETTER SHIN - u'\u05ea' # 0x009a -> HEBREW LETTER TAV - u'\xa2' # 0x009b -> CENT SIGN - u'\xa3' # 0x009c -> POUND SIGN - u'\xa5' # 0x009d -> YEN SIGN - u'\u20a7' # 0x009e -> PESETA SIGN - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\u2310' # 0x00a9 -> REVERSED NOT SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u05d0' # 0x0080 -> HEBREW LETTER ALEF + u'\u05d1' # 0x0081 -> HEBREW LETTER BET + u'\u05d2' # 0x0082 -> HEBREW LETTER GIMEL + u'\u05d3' # 0x0083 -> HEBREW LETTER DALET + u'\u05d4' # 0x0084 -> HEBREW LETTER HE + u'\u05d5' # 0x0085 -> HEBREW LETTER VAV + u'\u05d6' # 0x0086 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0x0087 -> HEBREW LETTER HET + u'\u05d8' # 0x0088 -> HEBREW LETTER TET + u'\u05d9' # 0x0089 -> HEBREW LETTER YOD + u'\u05da' # 0x008a -> HEBREW LETTER FINAL KAF + u'\u05db' # 0x008b -> HEBREW LETTER KAF + u'\u05dc' # 0x008c -> HEBREW LETTER LAMED + u'\u05dd' # 0x008d -> HEBREW LETTER FINAL MEM + u'\u05de' # 0x008e -> HEBREW LETTER MEM + u'\u05df' # 0x008f -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0x0090 -> HEBREW LETTER NUN + u'\u05e1' # 0x0091 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0x0092 -> HEBREW LETTER AYIN + u'\u05e3' # 0x0093 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0x0094 -> HEBREW LETTER PE + u'\u05e5' # 0x0095 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0x0096 -> HEBREW LETTER TSADI + u'\u05e7' # 0x0097 -> HEBREW LETTER QOF + u'\u05e8' # 0x0098 -> HEBREW LETTER RESH + u'\u05e9' # 0x0099 -> HEBREW LETTER SHIN + u'\u05ea' # 0x009a -> HEBREW LETTER TAV + u'\xa2' # 0x009b -> CENT SIGN + u'\xa3' # 0x009c -> POUND SIGN + u'\xa5' # 0x009d -> YEN SIGN + u'\u20a7' # 0x009e -> PESETA SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN) + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a2: 0x009b, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a5: 0x009d, # YEN SIGN - 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x05d0: 0x0080, # HEBREW LETTER ALEF - 0x05d1: 0x0081, # HEBREW LETTER BET - 0x05d2: 0x0082, # HEBREW LETTER GIMEL - 0x05d3: 0x0083, # HEBREW LETTER DALET - 0x05d4: 0x0084, # HEBREW LETTER HE - 0x05d5: 0x0085, # HEBREW LETTER VAV - 0x05d6: 0x0086, # HEBREW LETTER ZAYIN - 0x05d7: 0x0087, # HEBREW LETTER HET - 0x05d8: 0x0088, # HEBREW LETTER TET - 0x05d9: 0x0089, # HEBREW LETTER YOD - 0x05da: 0x008a, # HEBREW LETTER FINAL KAF - 0x05db: 0x008b, # HEBREW LETTER KAF - 0x05dc: 0x008c, # HEBREW LETTER LAMED - 0x05dd: 0x008d, # HEBREW LETTER FINAL MEM - 0x05de: 0x008e, # HEBREW LETTER MEM - 0x05df: 0x008f, # HEBREW LETTER FINAL NUN - 0x05e0: 0x0090, # HEBREW LETTER NUN - 0x05e1: 0x0091, # HEBREW LETTER SAMEKH - 0x05e2: 0x0092, # HEBREW LETTER AYIN - 0x05e3: 0x0093, # HEBREW LETTER FINAL PE - 0x05e4: 0x0094, # HEBREW LETTER PE - 0x05e5: 0x0095, # HEBREW LETTER FINAL TSADI - 0x05e6: 0x0096, # HEBREW LETTER TSADI - 0x05e7: 0x0097, # HEBREW LETTER QOF - 0x05e8: 0x0098, # HEBREW LETTER RESH - 0x05e9: 0x0099, # HEBREW LETTER SHIN - 0x05ea: 0x009a, # HEBREW LETTER TAV - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x20a7: 0x009e, # PESETA SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2310: 0x00a9, # REVERSED NOT SIGN - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} \ No newline at end of file + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x009b, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a5: 0x009d, # YEN SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x05d0: 0x0080, # HEBREW LETTER ALEF + 0x05d1: 0x0081, # HEBREW LETTER BET + 0x05d2: 0x0082, # HEBREW LETTER GIMEL + 0x05d3: 0x0083, # HEBREW LETTER DALET + 0x05d4: 0x0084, # HEBREW LETTER HE + 0x05d5: 0x0085, # HEBREW LETTER VAV + 0x05d6: 0x0086, # HEBREW LETTER ZAYIN + 0x05d7: 0x0087, # HEBREW LETTER HET + 0x05d8: 0x0088, # HEBREW LETTER TET + 0x05d9: 0x0089, # HEBREW LETTER YOD + 0x05da: 0x008a, # HEBREW LETTER FINAL KAF + 0x05db: 0x008b, # HEBREW LETTER KAF + 0x05dc: 0x008c, # HEBREW LETTER LAMED + 0x05dd: 0x008d, # HEBREW LETTER FINAL MEM + 0x05de: 0x008e, # HEBREW LETTER MEM + 0x05df: 0x008f, # HEBREW LETTER FINAL NUN + 0x05e0: 0x0090, # HEBREW LETTER NUN + 0x05e1: 0x0091, # HEBREW LETTER SAMEKH + 0x05e2: 0x0092, # HEBREW LETTER AYIN + 0x05e3: 0x0093, # HEBREW LETTER FINAL PE + 0x05e4: 0x0094, # HEBREW LETTER PE + 0x05e5: 0x0095, # HEBREW LETTER FINAL TSADI + 0x05e6: 0x0096, # HEBREW LETTER TSADI + 0x05e7: 0x0097, # HEBREW LETTER QOF + 0x05e8: 0x0098, # HEBREW LETTER RESH + 0x05e9: 0x0099, # HEBREW LETTER SHIN + 0x05ea: 0x009a, # HEBREW LETTER TAV + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} Modified: python/branches/ssize_t/Lib/encodings/cp863.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp863.py (original) +++ python/branches/ssize_t/Lib/encodings/cp863.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,654 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00b6, # PILCROW SIGN - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x2017, # DOUBLE LOW LINE - 0x008e: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x008f: 0x00a7, # SECTION SIGN - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x0092: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x0095: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00a4, # CURRENCY SIGN - 0x0099: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x009e: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00a6, # BROKEN BAR - 0x00a1: 0x00b4, # ACUTE ACCENT - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00a8, # DIAERESIS - 0x00a5: 0x00b8, # CEDILLA - 0x00a6: 0x00b3, # SUPERSCRIPT THREE - 0x00a7: 0x00af, # MACRON - 0x00a8: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00b6, # PILCROW SIGN + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x2017, # DOUBLE LOW LINE + 0x008e: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x008f: 0x00a7, # SECTION SIGN + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x0092: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x0095: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00a4, # CURRENCY SIGN + 0x0099: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x009e: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00a6, # BROKEN BAR + 0x00a1: 0x00b4, # ACUTE ACCENT + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00a8, # DIAERESIS + 0x00a5: 0x00b8, # CEDILLA + 0x00a6: 0x00b3, # SUPERSCRIPT THREE + 0x00a7: 0x00af, # MACRON + 0x00a8: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xc2' # 0x0084 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xb6' # 0x0086 -> PILCROW SIGN - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u2017' # 0x008d -> DOUBLE LOW LINE - u'\xc0' # 0x008e -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xa7' # 0x008f -> SECTION SIGN - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xc8' # 0x0091 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xca' # 0x0092 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xcb' # 0x0094 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcf' # 0x0095 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\xa4' # 0x0098 -> CURRENCY SIGN - u'\xd4' # 0x0099 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xa2' # 0x009b -> CENT SIGN - u'\xa3' # 0x009c -> POUND SIGN - u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xdb' # 0x009e -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xa6' # 0x00a0 -> BROKEN BAR - u'\xb4' # 0x00a1 -> ACUTE ACCENT - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xa8' # 0x00a4 -> DIAERESIS - u'\xb8' # 0x00a5 -> CEDILLA - u'\xb3' # 0x00a6 -> SUPERSCRIPT THREE - u'\xaf' # 0x00a7 -> MACRON - u'\xce' # 0x00a8 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u2310' # 0x00a9 -> REVERSED NOT SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xbe' # 0x00ad -> VULGAR FRACTION THREE QUARTERS - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xc2' # 0x0084 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xb6' # 0x0086 -> PILCROW SIGN + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u2017' # 0x008d -> DOUBLE LOW LINE + u'\xc0' # 0x008e -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xa7' # 0x008f -> SECTION SIGN + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xc8' # 0x0091 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xca' # 0x0092 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xcb' # 0x0094 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcf' # 0x0095 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xa4' # 0x0098 -> CURRENCY SIGN + u'\xd4' # 0x0099 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xa2' # 0x009b -> CENT SIGN + u'\xa3' # 0x009c -> POUND SIGN + u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xdb' # 0x009e -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xa6' # 0x00a0 -> BROKEN BAR + u'\xb4' # 0x00a1 -> ACUTE ACCENT + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xa8' # 0x00a4 -> DIAERESIS + u'\xb8' # 0x00a5 -> CEDILLA + u'\xb3' # 0x00a6 -> SUPERSCRIPT THREE + u'\xaf' # 0x00a7 -> MACRON + u'\xce' # 0x00a8 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xbe' # 0x00ad -> VULGAR FRACTION THREE QUARTERS + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a2: 0x009b, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a4: 0x0098, # CURRENCY SIGN - 0x00a6: 0x00a0, # BROKEN BAR - 0x00a7: 0x008f, # SECTION SIGN - 0x00a8: 0x00a4, # DIAERESIS - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00af: 0x00a7, # MACRON - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b3: 0x00a6, # SUPERSCRIPT THREE - 0x00b4: 0x00a1, # ACUTE ACCENT - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b6: 0x0086, # PILCROW SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00b8: 0x00a5, # CEDILLA - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00be: 0x00ad, # VULGAR FRACTION THREE QUARTERS - 0x00c0: 0x008e, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c2: 0x0084, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x0091, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x0092, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0x0094, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00ce: 0x00a8, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0x0095, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d4: 0x0099, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00db: 0x009e, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x2017: 0x008d, # DOUBLE LOW LINE - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2310: 0x00a9, # REVERSED NOT SIGN - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} \ No newline at end of file + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a2: 0x009b, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x0098, # CURRENCY SIGN + 0x00a6: 0x00a0, # BROKEN BAR + 0x00a7: 0x008f, # SECTION SIGN + 0x00a8: 0x00a4, # DIAERESIS + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00af: 0x00a7, # MACRON + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00a6, # SUPERSCRIPT THREE + 0x00b4: 0x00a1, # ACUTE ACCENT + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x0086, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b8: 0x00a5, # CEDILLA + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00ad, # VULGAR FRACTION THREE QUARTERS + 0x00c0: 0x008e, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c2: 0x0084, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x0091, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x0092, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x0094, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00ce: 0x00a8, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x0095, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d4: 0x0099, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00db: 0x009e, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x2017: 0x008d, # DOUBLE LOW LINE + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} Modified: python/branches/ssize_t/Lib/encodings/cp864.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp864.py (original) +++ python/branches/ssize_t/Lib/encodings/cp864.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,646 +32,646 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0025: 0x066a, # ARABIC PERCENT SIGN - 0x0080: 0x00b0, # DEGREE SIGN - 0x0081: 0x00b7, # MIDDLE DOT - 0x0082: 0x2219, # BULLET OPERATOR - 0x0083: 0x221a, # SQUARE ROOT - 0x0084: 0x2592, # MEDIUM SHADE - 0x0085: 0x2500, # FORMS LIGHT HORIZONTAL - 0x0086: 0x2502, # FORMS LIGHT VERTICAL - 0x0087: 0x253c, # FORMS LIGHT VERTICAL AND HORIZONTAL - 0x0088: 0x2524, # FORMS LIGHT VERTICAL AND LEFT - 0x0089: 0x252c, # FORMS LIGHT DOWN AND HORIZONTAL - 0x008a: 0x251c, # FORMS LIGHT VERTICAL AND RIGHT - 0x008b: 0x2534, # FORMS LIGHT UP AND HORIZONTAL - 0x008c: 0x2510, # FORMS LIGHT DOWN AND LEFT - 0x008d: 0x250c, # FORMS LIGHT DOWN AND RIGHT - 0x008e: 0x2514, # FORMS LIGHT UP AND RIGHT - 0x008f: 0x2518, # FORMS LIGHT UP AND LEFT - 0x0090: 0x03b2, # GREEK SMALL BETA - 0x0091: 0x221e, # INFINITY - 0x0092: 0x03c6, # GREEK SMALL PHI - 0x0093: 0x00b1, # PLUS-OR-MINUS SIGN - 0x0094: 0x00bd, # FRACTION 1/2 - 0x0095: 0x00bc, # FRACTION 1/4 - 0x0096: 0x2248, # ALMOST EQUAL TO - 0x0097: 0x00ab, # LEFT POINTING GUILLEMET - 0x0098: 0x00bb, # RIGHT POINTING GUILLEMET - 0x0099: 0xfef7, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM - 0x009a: 0xfef8, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM - 0x009b: None, # UNDEFINED - 0x009c: None, # UNDEFINED - 0x009d: 0xfefb, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM - 0x009e: 0xfefc, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM - 0x009f: None, # UNDEFINED - 0x00a1: 0x00ad, # SOFT HYPHEN - 0x00a2: 0xfe82, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM - 0x00a5: 0xfe84, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM - 0x00a6: None, # UNDEFINED - 0x00a7: None, # UNDEFINED - 0x00a8: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM - 0x00a9: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM - 0x00aa: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM - 0x00ab: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM - 0x00ac: 0x060c, # ARABIC COMMA - 0x00ad: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM - 0x00ae: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM - 0x00af: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM - 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO - 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE - 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO - 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE - 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR - 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE - 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX - 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN - 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT - 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE - 0x00ba: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM - 0x00bb: 0x061b, # ARABIC SEMICOLON - 0x00bc: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM - 0x00bd: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM - 0x00be: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM - 0x00bf: 0x061f, # ARABIC QUESTION MARK - 0x00c0: 0x00a2, # CENT SIGN - 0x00c1: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM - 0x00c2: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - 0x00c3: 0xfe83, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM - 0x00c4: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - 0x00c5: 0xfeca, # ARABIC LETTER AIN FINAL FORM - 0x00c6: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - 0x00c7: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM - 0x00c8: 0xfe91, # ARABIC LETTER BEH INITIAL FORM - 0x00c9: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM - 0x00ca: 0xfe97, # ARABIC LETTER TEH INITIAL FORM - 0x00cb: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM - 0x00cc: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM - 0x00cd: 0xfea3, # ARABIC LETTER HAH INITIAL FORM - 0x00ce: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM - 0x00cf: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM - 0x00d0: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM - 0x00d1: 0xfead, # ARABIC LETTER REH ISOLATED FORM - 0x00d2: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM - 0x00d3: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM - 0x00d4: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM - 0x00d5: 0xfebb, # ARABIC LETTER SAD INITIAL FORM - 0x00d6: 0xfebf, # ARABIC LETTER DAD INITIAL FORM - 0x00d7: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM - 0x00d8: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM - 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM - 0x00da: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM - 0x00db: 0x00a6, # BROKEN VERTICAL BAR - 0x00dc: 0x00ac, # NOT SIGN - 0x00dd: 0x00f7, # DIVISION SIGN - 0x00de: 0x00d7, # MULTIPLICATION SIGN - 0x00df: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM - 0x00e0: 0x0640, # ARABIC TATWEEL - 0x00e1: 0xfed3, # ARABIC LETTER FEH INITIAL FORM - 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM - 0x00e3: 0xfedb, # ARABIC LETTER KAF INITIAL FORM - 0x00e4: 0xfedf, # ARABIC LETTER LAM INITIAL FORM - 0x00e5: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM - 0x00e6: 0xfee7, # ARABIC LETTER NOON INITIAL FORM - 0x00e7: 0xfeeb, # ARABIC LETTER HEH INITIAL FORM - 0x00e8: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM - 0x00e9: 0xfeef, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM - 0x00ea: 0xfef3, # ARABIC LETTER YEH INITIAL FORM - 0x00eb: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM - 0x00ec: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM - 0x00ed: 0xfece, # ARABIC LETTER GHAIN FINAL FORM - 0x00ee: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM - 0x00ef: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM - 0x00f0: 0xfe7d, # ARABIC SHADDA MEDIAL FORM - 0x00f1: 0x0651, # ARABIC SHADDAH - 0x00f2: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM - 0x00f3: 0xfee9, # ARABIC LETTER HEH ISOLATED FORM - 0x00f4: 0xfeec, # ARABIC LETTER HEH MEDIAL FORM - 0x00f5: 0xfef0, # ARABIC LETTER ALEF MAKSURA FINAL FORM - 0x00f6: 0xfef2, # ARABIC LETTER YEH FINAL FORM - 0x00f7: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM - 0x00f8: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM - 0x00f9: 0xfef5, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM - 0x00fa: 0xfef6, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM - 0x00fb: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM - 0x00fc: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM - 0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: None, # UNDEFINED + 0x0025: 0x066a, # ARABIC PERCENT SIGN + 0x0080: 0x00b0, # DEGREE SIGN + 0x0081: 0x00b7, # MIDDLE DOT + 0x0082: 0x2219, # BULLET OPERATOR + 0x0083: 0x221a, # SQUARE ROOT + 0x0084: 0x2592, # MEDIUM SHADE + 0x0085: 0x2500, # FORMS LIGHT HORIZONTAL + 0x0086: 0x2502, # FORMS LIGHT VERTICAL + 0x0087: 0x253c, # FORMS LIGHT VERTICAL AND HORIZONTAL + 0x0088: 0x2524, # FORMS LIGHT VERTICAL AND LEFT + 0x0089: 0x252c, # FORMS LIGHT DOWN AND HORIZONTAL + 0x008a: 0x251c, # FORMS LIGHT VERTICAL AND RIGHT + 0x008b: 0x2534, # FORMS LIGHT UP AND HORIZONTAL + 0x008c: 0x2510, # FORMS LIGHT DOWN AND LEFT + 0x008d: 0x250c, # FORMS LIGHT DOWN AND RIGHT + 0x008e: 0x2514, # FORMS LIGHT UP AND RIGHT + 0x008f: 0x2518, # FORMS LIGHT UP AND LEFT + 0x0090: 0x03b2, # GREEK SMALL BETA + 0x0091: 0x221e, # INFINITY + 0x0092: 0x03c6, # GREEK SMALL PHI + 0x0093: 0x00b1, # PLUS-OR-MINUS SIGN + 0x0094: 0x00bd, # FRACTION 1/2 + 0x0095: 0x00bc, # FRACTION 1/4 + 0x0096: 0x2248, # ALMOST EQUAL TO + 0x0097: 0x00ab, # LEFT POINTING GUILLEMET + 0x0098: 0x00bb, # RIGHT POINTING GUILLEMET + 0x0099: 0xfef7, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM + 0x009a: 0xfef8, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM + 0x009b: None, # UNDEFINED + 0x009c: None, # UNDEFINED + 0x009d: 0xfefb, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM + 0x009e: 0xfefc, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM + 0x009f: None, # UNDEFINED + 0x00a1: 0x00ad, # SOFT HYPHEN + 0x00a2: 0xfe82, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM + 0x00a5: 0xfe84, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM + 0x00a6: None, # UNDEFINED + 0x00a7: None, # UNDEFINED + 0x00a8: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM + 0x00a9: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM + 0x00aa: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM + 0x00ab: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM + 0x00ac: 0x060c, # ARABIC COMMA + 0x00ad: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM + 0x00ae: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM + 0x00af: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM + 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO + 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE + 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO + 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE + 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR + 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE + 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX + 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN + 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT + 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE + 0x00ba: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM + 0x00bb: 0x061b, # ARABIC SEMICOLON + 0x00bc: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM + 0x00bd: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM + 0x00be: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM + 0x00bf: 0x061f, # ARABIC QUESTION MARK + 0x00c0: 0x00a2, # CENT SIGN + 0x00c1: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM + 0x00c2: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + 0x00c3: 0xfe83, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM + 0x00c4: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + 0x00c5: 0xfeca, # ARABIC LETTER AIN FINAL FORM + 0x00c6: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + 0x00c7: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM + 0x00c8: 0xfe91, # ARABIC LETTER BEH INITIAL FORM + 0x00c9: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM + 0x00ca: 0xfe97, # ARABIC LETTER TEH INITIAL FORM + 0x00cb: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM + 0x00cc: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM + 0x00cd: 0xfea3, # ARABIC LETTER HAH INITIAL FORM + 0x00ce: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM + 0x00cf: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM + 0x00d0: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM + 0x00d1: 0xfead, # ARABIC LETTER REH ISOLATED FORM + 0x00d2: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM + 0x00d3: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM + 0x00d4: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM + 0x00d5: 0xfebb, # ARABIC LETTER SAD INITIAL FORM + 0x00d6: 0xfebf, # ARABIC LETTER DAD INITIAL FORM + 0x00d7: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM + 0x00d8: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM + 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM + 0x00da: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM + 0x00db: 0x00a6, # BROKEN VERTICAL BAR + 0x00dc: 0x00ac, # NOT SIGN + 0x00dd: 0x00f7, # DIVISION SIGN + 0x00de: 0x00d7, # MULTIPLICATION SIGN + 0x00df: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM + 0x00e0: 0x0640, # ARABIC TATWEEL + 0x00e1: 0xfed3, # ARABIC LETTER FEH INITIAL FORM + 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM + 0x00e3: 0xfedb, # ARABIC LETTER KAF INITIAL FORM + 0x00e4: 0xfedf, # ARABIC LETTER LAM INITIAL FORM + 0x00e5: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM + 0x00e6: 0xfee7, # ARABIC LETTER NOON INITIAL FORM + 0x00e7: 0xfeeb, # ARABIC LETTER HEH INITIAL FORM + 0x00e8: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM + 0x00e9: 0xfeef, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM + 0x00ea: 0xfef3, # ARABIC LETTER YEH INITIAL FORM + 0x00eb: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM + 0x00ec: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM + 0x00ed: 0xfece, # ARABIC LETTER GHAIN FINAL FORM + 0x00ee: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM + 0x00ef: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM + 0x00f0: 0xfe7d, # ARABIC SHADDA MEDIAL FORM + 0x00f1: 0x0651, # ARABIC SHADDAH + 0x00f2: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM + 0x00f3: 0xfee9, # ARABIC LETTER HEH ISOLATED FORM + 0x00f4: 0xfeec, # ARABIC LETTER HEH MEDIAL FORM + 0x00f5: 0xfef0, # ARABIC LETTER ALEF MAKSURA FINAL FORM + 0x00f6: 0xfef2, # ARABIC LETTER YEH FINAL FORM + 0x00f7: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM + 0x00f8: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM + 0x00f9: 0xfef5, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM + 0x00fa: 0xfef6, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM + 0x00fb: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM + 0x00fc: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM + 0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: None, # UNDEFINED }) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'\u066a' # 0x0025 -> ARABIC PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xb0' # 0x0080 -> DEGREE SIGN - u'\xb7' # 0x0081 -> MIDDLE DOT - u'\u2219' # 0x0082 -> BULLET OPERATOR - u'\u221a' # 0x0083 -> SQUARE ROOT - u'\u2592' # 0x0084 -> MEDIUM SHADE - u'\u2500' # 0x0085 -> FORMS LIGHT HORIZONTAL - u'\u2502' # 0x0086 -> FORMS LIGHT VERTICAL - u'\u253c' # 0x0087 -> FORMS LIGHT VERTICAL AND HORIZONTAL - u'\u2524' # 0x0088 -> FORMS LIGHT VERTICAL AND LEFT - u'\u252c' # 0x0089 -> FORMS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x008a -> FORMS LIGHT VERTICAL AND RIGHT - u'\u2534' # 0x008b -> FORMS LIGHT UP AND HORIZONTAL - u'\u2510' # 0x008c -> FORMS LIGHT DOWN AND LEFT - u'\u250c' # 0x008d -> FORMS LIGHT DOWN AND RIGHT - u'\u2514' # 0x008e -> FORMS LIGHT UP AND RIGHT - u'\u2518' # 0x008f -> FORMS LIGHT UP AND LEFT - u'\u03b2' # 0x0090 -> GREEK SMALL BETA - u'\u221e' # 0x0091 -> INFINITY - u'\u03c6' # 0x0092 -> GREEK SMALL PHI - u'\xb1' # 0x0093 -> PLUS-OR-MINUS SIGN - u'\xbd' # 0x0094 -> FRACTION 1/2 - u'\xbc' # 0x0095 -> FRACTION 1/4 - u'\u2248' # 0x0096 -> ALMOST EQUAL TO - u'\xab' # 0x0097 -> LEFT POINTING GUILLEMET - u'\xbb' # 0x0098 -> RIGHT POINTING GUILLEMET - u'\ufef7' # 0x0099 -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM - u'\ufef8' # 0x009a -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM - u'\ufffe' # 0x009b -> UNDEFINED - u'\ufffe' # 0x009c -> UNDEFINED - u'\ufefb' # 0x009d -> ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM - u'\ufefc' # 0x009e -> ARABIC LIGATURE LAM WITH ALEF FINAL FORM - u'\ufffe' # 0x009f -> UNDEFINED - u'\xa0' # 0x00a0 -> NON-BREAKING SPACE - u'\xad' # 0x00a1 -> SOFT HYPHEN - u'\ufe82' # 0x00a2 -> ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM - u'\xa3' # 0x00a3 -> POUND SIGN - u'\xa4' # 0x00a4 -> CURRENCY SIGN - u'\ufe84' # 0x00a5 -> ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM - u'\ufffe' # 0x00a6 -> UNDEFINED - u'\ufffe' # 0x00a7 -> UNDEFINED - u'\ufe8e' # 0x00a8 -> ARABIC LETTER ALEF FINAL FORM - u'\ufe8f' # 0x00a9 -> ARABIC LETTER BEH ISOLATED FORM - u'\ufe95' # 0x00aa -> ARABIC LETTER TEH ISOLATED FORM - u'\ufe99' # 0x00ab -> ARABIC LETTER THEH ISOLATED FORM - u'\u060c' # 0x00ac -> ARABIC COMMA - u'\ufe9d' # 0x00ad -> ARABIC LETTER JEEM ISOLATED FORM - u'\ufea1' # 0x00ae -> ARABIC LETTER HAH ISOLATED FORM - u'\ufea5' # 0x00af -> ARABIC LETTER KHAH ISOLATED FORM - u'\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO - u'\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE - u'\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO - u'\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE - u'\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR - u'\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE - u'\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX - u'\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN - u'\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT - u'\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE - u'\ufed1' # 0x00ba -> ARABIC LETTER FEH ISOLATED FORM - u'\u061b' # 0x00bb -> ARABIC SEMICOLON - u'\ufeb1' # 0x00bc -> ARABIC LETTER SEEN ISOLATED FORM - u'\ufeb5' # 0x00bd -> ARABIC LETTER SHEEN ISOLATED FORM - u'\ufeb9' # 0x00be -> ARABIC LETTER SAD ISOLATED FORM - u'\u061f' # 0x00bf -> ARABIC QUESTION MARK - u'\xa2' # 0x00c0 -> CENT SIGN - u'\ufe80' # 0x00c1 -> ARABIC LETTER HAMZA ISOLATED FORM - u'\ufe81' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - u'\ufe83' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM - u'\ufe85' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - u'\ufeca' # 0x00c5 -> ARABIC LETTER AIN FINAL FORM - u'\ufe8b' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - u'\ufe8d' # 0x00c7 -> ARABIC LETTER ALEF ISOLATED FORM - u'\ufe91' # 0x00c8 -> ARABIC LETTER BEH INITIAL FORM - u'\ufe93' # 0x00c9 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM - u'\ufe97' # 0x00ca -> ARABIC LETTER TEH INITIAL FORM - u'\ufe9b' # 0x00cb -> ARABIC LETTER THEH INITIAL FORM - u'\ufe9f' # 0x00cc -> ARABIC LETTER JEEM INITIAL FORM - u'\ufea3' # 0x00cd -> ARABIC LETTER HAH INITIAL FORM - u'\ufea7' # 0x00ce -> ARABIC LETTER KHAH INITIAL FORM - u'\ufea9' # 0x00cf -> ARABIC LETTER DAL ISOLATED FORM - u'\ufeab' # 0x00d0 -> ARABIC LETTER THAL ISOLATED FORM - u'\ufead' # 0x00d1 -> ARABIC LETTER REH ISOLATED FORM - u'\ufeaf' # 0x00d2 -> ARABIC LETTER ZAIN ISOLATED FORM - u'\ufeb3' # 0x00d3 -> ARABIC LETTER SEEN INITIAL FORM - u'\ufeb7' # 0x00d4 -> ARABIC LETTER SHEEN INITIAL FORM - u'\ufebb' # 0x00d5 -> ARABIC LETTER SAD INITIAL FORM - u'\ufebf' # 0x00d6 -> ARABIC LETTER DAD INITIAL FORM - u'\ufec1' # 0x00d7 -> ARABIC LETTER TAH ISOLATED FORM - u'\ufec5' # 0x00d8 -> ARABIC LETTER ZAH ISOLATED FORM - u'\ufecb' # 0x00d9 -> ARABIC LETTER AIN INITIAL FORM - u'\ufecf' # 0x00da -> ARABIC LETTER GHAIN INITIAL FORM - u'\xa6' # 0x00db -> BROKEN VERTICAL BAR - u'\xac' # 0x00dc -> NOT SIGN - u'\xf7' # 0x00dd -> DIVISION SIGN - u'\xd7' # 0x00de -> MULTIPLICATION SIGN - u'\ufec9' # 0x00df -> ARABIC LETTER AIN ISOLATED FORM - u'\u0640' # 0x00e0 -> ARABIC TATWEEL - u'\ufed3' # 0x00e1 -> ARABIC LETTER FEH INITIAL FORM - u'\ufed7' # 0x00e2 -> ARABIC LETTER QAF INITIAL FORM - u'\ufedb' # 0x00e3 -> ARABIC LETTER KAF INITIAL FORM - u'\ufedf' # 0x00e4 -> ARABIC LETTER LAM INITIAL FORM - u'\ufee3' # 0x00e5 -> ARABIC LETTER MEEM INITIAL FORM - u'\ufee7' # 0x00e6 -> ARABIC LETTER NOON INITIAL FORM - u'\ufeeb' # 0x00e7 -> ARABIC LETTER HEH INITIAL FORM - u'\ufeed' # 0x00e8 -> ARABIC LETTER WAW ISOLATED FORM - u'\ufeef' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA ISOLATED FORM - u'\ufef3' # 0x00ea -> ARABIC LETTER YEH INITIAL FORM - u'\ufebd' # 0x00eb -> ARABIC LETTER DAD ISOLATED FORM - u'\ufecc' # 0x00ec -> ARABIC LETTER AIN MEDIAL FORM - u'\ufece' # 0x00ed -> ARABIC LETTER GHAIN FINAL FORM - u'\ufecd' # 0x00ee -> ARABIC LETTER GHAIN ISOLATED FORM - u'\ufee1' # 0x00ef -> ARABIC LETTER MEEM ISOLATED FORM - u'\ufe7d' # 0x00f0 -> ARABIC SHADDA MEDIAL FORM - u'\u0651' # 0x00f1 -> ARABIC SHADDAH - u'\ufee5' # 0x00f2 -> ARABIC LETTER NOON ISOLATED FORM - u'\ufee9' # 0x00f3 -> ARABIC LETTER HEH ISOLATED FORM - u'\ufeec' # 0x00f4 -> ARABIC LETTER HEH MEDIAL FORM - u'\ufef0' # 0x00f5 -> ARABIC LETTER ALEF MAKSURA FINAL FORM - u'\ufef2' # 0x00f6 -> ARABIC LETTER YEH FINAL FORM - u'\ufed0' # 0x00f7 -> ARABIC LETTER GHAIN MEDIAL FORM - u'\ufed5' # 0x00f8 -> ARABIC LETTER QAF ISOLATED FORM - u'\ufef5' # 0x00f9 -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM - u'\ufef6' # 0x00fa -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM - u'\ufedd' # 0x00fb -> ARABIC LETTER LAM ISOLATED FORM - u'\ufed9' # 0x00fc -> ARABIC LETTER KAF ISOLATED FORM - u'\ufef1' # 0x00fd -> ARABIC LETTER YEH ISOLATED FORM - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\ufffe' # 0x00ff -> UNDEFINED + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'\u066a' # 0x0025 -> ARABIC PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xb0' # 0x0080 -> DEGREE SIGN + u'\xb7' # 0x0081 -> MIDDLE DOT + u'\u2219' # 0x0082 -> BULLET OPERATOR + u'\u221a' # 0x0083 -> SQUARE ROOT + u'\u2592' # 0x0084 -> MEDIUM SHADE + u'\u2500' # 0x0085 -> FORMS LIGHT HORIZONTAL + u'\u2502' # 0x0086 -> FORMS LIGHT VERTICAL + u'\u253c' # 0x0087 -> FORMS LIGHT VERTICAL AND HORIZONTAL + u'\u2524' # 0x0088 -> FORMS LIGHT VERTICAL AND LEFT + u'\u252c' # 0x0089 -> FORMS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x008a -> FORMS LIGHT VERTICAL AND RIGHT + u'\u2534' # 0x008b -> FORMS LIGHT UP AND HORIZONTAL + u'\u2510' # 0x008c -> FORMS LIGHT DOWN AND LEFT + u'\u250c' # 0x008d -> FORMS LIGHT DOWN AND RIGHT + u'\u2514' # 0x008e -> FORMS LIGHT UP AND RIGHT + u'\u2518' # 0x008f -> FORMS LIGHT UP AND LEFT + u'\u03b2' # 0x0090 -> GREEK SMALL BETA + u'\u221e' # 0x0091 -> INFINITY + u'\u03c6' # 0x0092 -> GREEK SMALL PHI + u'\xb1' # 0x0093 -> PLUS-OR-MINUS SIGN + u'\xbd' # 0x0094 -> FRACTION 1/2 + u'\xbc' # 0x0095 -> FRACTION 1/4 + u'\u2248' # 0x0096 -> ALMOST EQUAL TO + u'\xab' # 0x0097 -> LEFT POINTING GUILLEMET + u'\xbb' # 0x0098 -> RIGHT POINTING GUILLEMET + u'\ufef7' # 0x0099 -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM + u'\ufef8' # 0x009a -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM + u'\ufffe' # 0x009b -> UNDEFINED + u'\ufffe' # 0x009c -> UNDEFINED + u'\ufefb' # 0x009d -> ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM + u'\ufefc' # 0x009e -> ARABIC LIGATURE LAM WITH ALEF FINAL FORM + u'\ufffe' # 0x009f -> UNDEFINED + u'\xa0' # 0x00a0 -> NON-BREAKING SPACE + u'\xad' # 0x00a1 -> SOFT HYPHEN + u'\ufe82' # 0x00a2 -> ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\ufe84' # 0x00a5 -> ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM + u'\ufffe' # 0x00a6 -> UNDEFINED + u'\ufffe' # 0x00a7 -> UNDEFINED + u'\ufe8e' # 0x00a8 -> ARABIC LETTER ALEF FINAL FORM + u'\ufe8f' # 0x00a9 -> ARABIC LETTER BEH ISOLATED FORM + u'\ufe95' # 0x00aa -> ARABIC LETTER TEH ISOLATED FORM + u'\ufe99' # 0x00ab -> ARABIC LETTER THEH ISOLATED FORM + u'\u060c' # 0x00ac -> ARABIC COMMA + u'\ufe9d' # 0x00ad -> ARABIC LETTER JEEM ISOLATED FORM + u'\ufea1' # 0x00ae -> ARABIC LETTER HAH ISOLATED FORM + u'\ufea5' # 0x00af -> ARABIC LETTER KHAH ISOLATED FORM + u'\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO + u'\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE + u'\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO + u'\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE + u'\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR + u'\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE + u'\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX + u'\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN + u'\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT + u'\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE + u'\ufed1' # 0x00ba -> ARABIC LETTER FEH ISOLATED FORM + u'\u061b' # 0x00bb -> ARABIC SEMICOLON + u'\ufeb1' # 0x00bc -> ARABIC LETTER SEEN ISOLATED FORM + u'\ufeb5' # 0x00bd -> ARABIC LETTER SHEEN ISOLATED FORM + u'\ufeb9' # 0x00be -> ARABIC LETTER SAD ISOLATED FORM + u'\u061f' # 0x00bf -> ARABIC QUESTION MARK + u'\xa2' # 0x00c0 -> CENT SIGN + u'\ufe80' # 0x00c1 -> ARABIC LETTER HAMZA ISOLATED FORM + u'\ufe81' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + u'\ufe83' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM + u'\ufe85' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + u'\ufeca' # 0x00c5 -> ARABIC LETTER AIN FINAL FORM + u'\ufe8b' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + u'\ufe8d' # 0x00c7 -> ARABIC LETTER ALEF ISOLATED FORM + u'\ufe91' # 0x00c8 -> ARABIC LETTER BEH INITIAL FORM + u'\ufe93' # 0x00c9 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM + u'\ufe97' # 0x00ca -> ARABIC LETTER TEH INITIAL FORM + u'\ufe9b' # 0x00cb -> ARABIC LETTER THEH INITIAL FORM + u'\ufe9f' # 0x00cc -> ARABIC LETTER JEEM INITIAL FORM + u'\ufea3' # 0x00cd -> ARABIC LETTER HAH INITIAL FORM + u'\ufea7' # 0x00ce -> ARABIC LETTER KHAH INITIAL FORM + u'\ufea9' # 0x00cf -> ARABIC LETTER DAL ISOLATED FORM + u'\ufeab' # 0x00d0 -> ARABIC LETTER THAL ISOLATED FORM + u'\ufead' # 0x00d1 -> ARABIC LETTER REH ISOLATED FORM + u'\ufeaf' # 0x00d2 -> ARABIC LETTER ZAIN ISOLATED FORM + u'\ufeb3' # 0x00d3 -> ARABIC LETTER SEEN INITIAL FORM + u'\ufeb7' # 0x00d4 -> ARABIC LETTER SHEEN INITIAL FORM + u'\ufebb' # 0x00d5 -> ARABIC LETTER SAD INITIAL FORM + u'\ufebf' # 0x00d6 -> ARABIC LETTER DAD INITIAL FORM + u'\ufec1' # 0x00d7 -> ARABIC LETTER TAH ISOLATED FORM + u'\ufec5' # 0x00d8 -> ARABIC LETTER ZAH ISOLATED FORM + u'\ufecb' # 0x00d9 -> ARABIC LETTER AIN INITIAL FORM + u'\ufecf' # 0x00da -> ARABIC LETTER GHAIN INITIAL FORM + u'\xa6' # 0x00db -> BROKEN VERTICAL BAR + u'\xac' # 0x00dc -> NOT SIGN + u'\xf7' # 0x00dd -> DIVISION SIGN + u'\xd7' # 0x00de -> MULTIPLICATION SIGN + u'\ufec9' # 0x00df -> ARABIC LETTER AIN ISOLATED FORM + u'\u0640' # 0x00e0 -> ARABIC TATWEEL + u'\ufed3' # 0x00e1 -> ARABIC LETTER FEH INITIAL FORM + u'\ufed7' # 0x00e2 -> ARABIC LETTER QAF INITIAL FORM + u'\ufedb' # 0x00e3 -> ARABIC LETTER KAF INITIAL FORM + u'\ufedf' # 0x00e4 -> ARABIC LETTER LAM INITIAL FORM + u'\ufee3' # 0x00e5 -> ARABIC LETTER MEEM INITIAL FORM + u'\ufee7' # 0x00e6 -> ARABIC LETTER NOON INITIAL FORM + u'\ufeeb' # 0x00e7 -> ARABIC LETTER HEH INITIAL FORM + u'\ufeed' # 0x00e8 -> ARABIC LETTER WAW ISOLATED FORM + u'\ufeef' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA ISOLATED FORM + u'\ufef3' # 0x00ea -> ARABIC LETTER YEH INITIAL FORM + u'\ufebd' # 0x00eb -> ARABIC LETTER DAD ISOLATED FORM + u'\ufecc' # 0x00ec -> ARABIC LETTER AIN MEDIAL FORM + u'\ufece' # 0x00ed -> ARABIC LETTER GHAIN FINAL FORM + u'\ufecd' # 0x00ee -> ARABIC LETTER GHAIN ISOLATED FORM + u'\ufee1' # 0x00ef -> ARABIC LETTER MEEM ISOLATED FORM + u'\ufe7d' # 0x00f0 -> ARABIC SHADDA MEDIAL FORM + u'\u0651' # 0x00f1 -> ARABIC SHADDAH + u'\ufee5' # 0x00f2 -> ARABIC LETTER NOON ISOLATED FORM + u'\ufee9' # 0x00f3 -> ARABIC LETTER HEH ISOLATED FORM + u'\ufeec' # 0x00f4 -> ARABIC LETTER HEH MEDIAL FORM + u'\ufef0' # 0x00f5 -> ARABIC LETTER ALEF MAKSURA FINAL FORM + u'\ufef2' # 0x00f6 -> ARABIC LETTER YEH FINAL FORM + u'\ufed0' # 0x00f7 -> ARABIC LETTER GHAIN MEDIAL FORM + u'\ufed5' # 0x00f8 -> ARABIC LETTER QAF ISOLATED FORM + u'\ufef5' # 0x00f9 -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM + u'\ufef6' # 0x00fa -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM + u'\ufedd' # 0x00fb -> ARABIC LETTER LAM ISOLATED FORM + u'\ufed9' # 0x00fc -> ARABIC LETTER KAF ISOLATED FORM + u'\ufef1' # 0x00fd -> ARABIC LETTER YEH ISOLATED FORM + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\ufffe' # 0x00ff -> UNDEFINED ) ### Encoding Map encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00a0, # NON-BREAKING SPACE - 0x00a2: 0x00c0, # CENT SIGN - 0x00a3: 0x00a3, # POUND SIGN - 0x00a4: 0x00a4, # CURRENCY SIGN - 0x00a6: 0x00db, # BROKEN VERTICAL BAR - 0x00ab: 0x0097, # LEFT POINTING GUILLEMET - 0x00ac: 0x00dc, # NOT SIGN - 0x00ad: 0x00a1, # SOFT HYPHEN - 0x00b0: 0x0080, # DEGREE SIGN - 0x00b1: 0x0093, # PLUS-OR-MINUS SIGN - 0x00b7: 0x0081, # MIDDLE DOT - 0x00bb: 0x0098, # RIGHT POINTING GUILLEMET - 0x00bc: 0x0095, # FRACTION 1/4 - 0x00bd: 0x0094, # FRACTION 1/2 - 0x00d7: 0x00de, # MULTIPLICATION SIGN - 0x00f7: 0x00dd, # DIVISION SIGN - 0x03b2: 0x0090, # GREEK SMALL BETA - 0x03c6: 0x0092, # GREEK SMALL PHI - 0x060c: 0x00ac, # ARABIC COMMA - 0x061b: 0x00bb, # ARABIC SEMICOLON - 0x061f: 0x00bf, # ARABIC QUESTION MARK - 0x0640: 0x00e0, # ARABIC TATWEEL - 0x0651: 0x00f1, # ARABIC SHADDAH - 0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO - 0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE - 0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO - 0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE - 0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR - 0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE - 0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX - 0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN - 0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT - 0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE - 0x066a: 0x0025, # ARABIC PERCENT SIGN - 0x2219: 0x0082, # BULLET OPERATOR - 0x221a: 0x0083, # SQUARE ROOT - 0x221e: 0x0091, # INFINITY - 0x2248: 0x0096, # ALMOST EQUAL TO - 0x2500: 0x0085, # FORMS LIGHT HORIZONTAL - 0x2502: 0x0086, # FORMS LIGHT VERTICAL - 0x250c: 0x008d, # FORMS LIGHT DOWN AND RIGHT - 0x2510: 0x008c, # FORMS LIGHT DOWN AND LEFT - 0x2514: 0x008e, # FORMS LIGHT UP AND RIGHT - 0x2518: 0x008f, # FORMS LIGHT UP AND LEFT - 0x251c: 0x008a, # FORMS LIGHT VERTICAL AND RIGHT - 0x2524: 0x0088, # FORMS LIGHT VERTICAL AND LEFT - 0x252c: 0x0089, # FORMS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x008b, # FORMS LIGHT UP AND HORIZONTAL - 0x253c: 0x0087, # FORMS LIGHT VERTICAL AND HORIZONTAL - 0x2592: 0x0084, # MEDIUM SHADE - 0x25a0: 0x00fe, # BLACK SQUARE - 0xfe7d: 0x00f0, # ARABIC SHADDA MEDIAL FORM - 0xfe80: 0x00c1, # ARABIC LETTER HAMZA ISOLATED FORM - 0xfe81: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - 0xfe82: 0x00a2, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM - 0xfe83: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM - 0xfe84: 0x00a5, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM - 0xfe85: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - 0xfe8b: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - 0xfe8d: 0x00c7, # ARABIC LETTER ALEF ISOLATED FORM - 0xfe8e: 0x00a8, # ARABIC LETTER ALEF FINAL FORM - 0xfe8f: 0x00a9, # ARABIC LETTER BEH ISOLATED FORM - 0xfe91: 0x00c8, # ARABIC LETTER BEH INITIAL FORM - 0xfe93: 0x00c9, # ARABIC LETTER TEH MARBUTA ISOLATED FORM - 0xfe95: 0x00aa, # ARABIC LETTER TEH ISOLATED FORM - 0xfe97: 0x00ca, # ARABIC LETTER TEH INITIAL FORM - 0xfe99: 0x00ab, # ARABIC LETTER THEH ISOLATED FORM - 0xfe9b: 0x00cb, # ARABIC LETTER THEH INITIAL FORM - 0xfe9d: 0x00ad, # ARABIC LETTER JEEM ISOLATED FORM - 0xfe9f: 0x00cc, # ARABIC LETTER JEEM INITIAL FORM - 0xfea1: 0x00ae, # ARABIC LETTER HAH ISOLATED FORM - 0xfea3: 0x00cd, # ARABIC LETTER HAH INITIAL FORM - 0xfea5: 0x00af, # ARABIC LETTER KHAH ISOLATED FORM - 0xfea7: 0x00ce, # ARABIC LETTER KHAH INITIAL FORM - 0xfea9: 0x00cf, # ARABIC LETTER DAL ISOLATED FORM - 0xfeab: 0x00d0, # ARABIC LETTER THAL ISOLATED FORM - 0xfead: 0x00d1, # ARABIC LETTER REH ISOLATED FORM - 0xfeaf: 0x00d2, # ARABIC LETTER ZAIN ISOLATED FORM - 0xfeb1: 0x00bc, # ARABIC LETTER SEEN ISOLATED FORM - 0xfeb3: 0x00d3, # ARABIC LETTER SEEN INITIAL FORM - 0xfeb5: 0x00bd, # ARABIC LETTER SHEEN ISOLATED FORM - 0xfeb7: 0x00d4, # ARABIC LETTER SHEEN INITIAL FORM - 0xfeb9: 0x00be, # ARABIC LETTER SAD ISOLATED FORM - 0xfebb: 0x00d5, # ARABIC LETTER SAD INITIAL FORM - 0xfebd: 0x00eb, # ARABIC LETTER DAD ISOLATED FORM - 0xfebf: 0x00d6, # ARABIC LETTER DAD INITIAL FORM - 0xfec1: 0x00d7, # ARABIC LETTER TAH ISOLATED FORM - 0xfec5: 0x00d8, # ARABIC LETTER ZAH ISOLATED FORM - 0xfec9: 0x00df, # ARABIC LETTER AIN ISOLATED FORM - 0xfeca: 0x00c5, # ARABIC LETTER AIN FINAL FORM - 0xfecb: 0x00d9, # ARABIC LETTER AIN INITIAL FORM - 0xfecc: 0x00ec, # ARABIC LETTER AIN MEDIAL FORM - 0xfecd: 0x00ee, # ARABIC LETTER GHAIN ISOLATED FORM - 0xfece: 0x00ed, # ARABIC LETTER GHAIN FINAL FORM - 0xfecf: 0x00da, # ARABIC LETTER GHAIN INITIAL FORM - 0xfed0: 0x00f7, # ARABIC LETTER GHAIN MEDIAL FORM - 0xfed1: 0x00ba, # ARABIC LETTER FEH ISOLATED FORM - 0xfed3: 0x00e1, # ARABIC LETTER FEH INITIAL FORM - 0xfed5: 0x00f8, # ARABIC LETTER QAF ISOLATED FORM - 0xfed7: 0x00e2, # ARABIC LETTER QAF INITIAL FORM - 0xfed9: 0x00fc, # ARABIC LETTER KAF ISOLATED FORM - 0xfedb: 0x00e3, # ARABIC LETTER KAF INITIAL FORM - 0xfedd: 0x00fb, # ARABIC LETTER LAM ISOLATED FORM - 0xfedf: 0x00e4, # ARABIC LETTER LAM INITIAL FORM - 0xfee1: 0x00ef, # ARABIC LETTER MEEM ISOLATED FORM - 0xfee3: 0x00e5, # ARABIC LETTER MEEM INITIAL FORM - 0xfee5: 0x00f2, # ARABIC LETTER NOON ISOLATED FORM - 0xfee7: 0x00e6, # ARABIC LETTER NOON INITIAL FORM - 0xfee9: 0x00f3, # ARABIC LETTER HEH ISOLATED FORM - 0xfeeb: 0x00e7, # ARABIC LETTER HEH INITIAL FORM - 0xfeec: 0x00f4, # ARABIC LETTER HEH MEDIAL FORM - 0xfeed: 0x00e8, # ARABIC LETTER WAW ISOLATED FORM - 0xfeef: 0x00e9, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM - 0xfef0: 0x00f5, # ARABIC LETTER ALEF MAKSURA FINAL FORM - 0xfef1: 0x00fd, # ARABIC LETTER YEH ISOLATED FORM - 0xfef2: 0x00f6, # ARABIC LETTER YEH FINAL FORM - 0xfef3: 0x00ea, # ARABIC LETTER YEH INITIAL FORM - 0xfef5: 0x00f9, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM - 0xfef6: 0x00fa, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM - 0xfef7: 0x0099, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM - 0xfef8: 0x009a, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM - 0xfefb: 0x009d, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM - 0xfefc: 0x009e, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM -} \ No newline at end of file + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NON-BREAKING SPACE + 0x00a2: 0x00c0, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a6: 0x00db, # BROKEN VERTICAL BAR + 0x00ab: 0x0097, # LEFT POINTING GUILLEMET + 0x00ac: 0x00dc, # NOT SIGN + 0x00ad: 0x00a1, # SOFT HYPHEN + 0x00b0: 0x0080, # DEGREE SIGN + 0x00b1: 0x0093, # PLUS-OR-MINUS SIGN + 0x00b7: 0x0081, # MIDDLE DOT + 0x00bb: 0x0098, # RIGHT POINTING GUILLEMET + 0x00bc: 0x0095, # FRACTION 1/4 + 0x00bd: 0x0094, # FRACTION 1/2 + 0x00d7: 0x00de, # MULTIPLICATION SIGN + 0x00f7: 0x00dd, # DIVISION SIGN + 0x03b2: 0x0090, # GREEK SMALL BETA + 0x03c6: 0x0092, # GREEK SMALL PHI + 0x060c: 0x00ac, # ARABIC COMMA + 0x061b: 0x00bb, # ARABIC SEMICOLON + 0x061f: 0x00bf, # ARABIC QUESTION MARK + 0x0640: 0x00e0, # ARABIC TATWEEL + 0x0651: 0x00f1, # ARABIC SHADDAH + 0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO + 0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE + 0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO + 0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE + 0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR + 0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE + 0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX + 0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN + 0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT + 0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE + 0x066a: 0x0025, # ARABIC PERCENT SIGN + 0x2219: 0x0082, # BULLET OPERATOR + 0x221a: 0x0083, # SQUARE ROOT + 0x221e: 0x0091, # INFINITY + 0x2248: 0x0096, # ALMOST EQUAL TO + 0x2500: 0x0085, # FORMS LIGHT HORIZONTAL + 0x2502: 0x0086, # FORMS LIGHT VERTICAL + 0x250c: 0x008d, # FORMS LIGHT DOWN AND RIGHT + 0x2510: 0x008c, # FORMS LIGHT DOWN AND LEFT + 0x2514: 0x008e, # FORMS LIGHT UP AND RIGHT + 0x2518: 0x008f, # FORMS LIGHT UP AND LEFT + 0x251c: 0x008a, # FORMS LIGHT VERTICAL AND RIGHT + 0x2524: 0x0088, # FORMS LIGHT VERTICAL AND LEFT + 0x252c: 0x0089, # FORMS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x008b, # FORMS LIGHT UP AND HORIZONTAL + 0x253c: 0x0087, # FORMS LIGHT VERTICAL AND HORIZONTAL + 0x2592: 0x0084, # MEDIUM SHADE + 0x25a0: 0x00fe, # BLACK SQUARE + 0xfe7d: 0x00f0, # ARABIC SHADDA MEDIAL FORM + 0xfe80: 0x00c1, # ARABIC LETTER HAMZA ISOLATED FORM + 0xfe81: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + 0xfe82: 0x00a2, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM + 0xfe83: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM + 0xfe84: 0x00a5, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM + 0xfe85: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + 0xfe8b: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + 0xfe8d: 0x00c7, # ARABIC LETTER ALEF ISOLATED FORM + 0xfe8e: 0x00a8, # ARABIC LETTER ALEF FINAL FORM + 0xfe8f: 0x00a9, # ARABIC LETTER BEH ISOLATED FORM + 0xfe91: 0x00c8, # ARABIC LETTER BEH INITIAL FORM + 0xfe93: 0x00c9, # ARABIC LETTER TEH MARBUTA ISOLATED FORM + 0xfe95: 0x00aa, # ARABIC LETTER TEH ISOLATED FORM + 0xfe97: 0x00ca, # ARABIC LETTER TEH INITIAL FORM + 0xfe99: 0x00ab, # ARABIC LETTER THEH ISOLATED FORM + 0xfe9b: 0x00cb, # ARABIC LETTER THEH INITIAL FORM + 0xfe9d: 0x00ad, # ARABIC LETTER JEEM ISOLATED FORM + 0xfe9f: 0x00cc, # ARABIC LETTER JEEM INITIAL FORM + 0xfea1: 0x00ae, # ARABIC LETTER HAH ISOLATED FORM + 0xfea3: 0x00cd, # ARABIC LETTER HAH INITIAL FORM + 0xfea5: 0x00af, # ARABIC LETTER KHAH ISOLATED FORM + 0xfea7: 0x00ce, # ARABIC LETTER KHAH INITIAL FORM + 0xfea9: 0x00cf, # ARABIC LETTER DAL ISOLATED FORM + 0xfeab: 0x00d0, # ARABIC LETTER THAL ISOLATED FORM + 0xfead: 0x00d1, # ARABIC LETTER REH ISOLATED FORM + 0xfeaf: 0x00d2, # ARABIC LETTER ZAIN ISOLATED FORM + 0xfeb1: 0x00bc, # ARABIC LETTER SEEN ISOLATED FORM + 0xfeb3: 0x00d3, # ARABIC LETTER SEEN INITIAL FORM + 0xfeb5: 0x00bd, # ARABIC LETTER SHEEN ISOLATED FORM + 0xfeb7: 0x00d4, # ARABIC LETTER SHEEN INITIAL FORM + 0xfeb9: 0x00be, # ARABIC LETTER SAD ISOLATED FORM + 0xfebb: 0x00d5, # ARABIC LETTER SAD INITIAL FORM + 0xfebd: 0x00eb, # ARABIC LETTER DAD ISOLATED FORM + 0xfebf: 0x00d6, # ARABIC LETTER DAD INITIAL FORM + 0xfec1: 0x00d7, # ARABIC LETTER TAH ISOLATED FORM + 0xfec5: 0x00d8, # ARABIC LETTER ZAH ISOLATED FORM + 0xfec9: 0x00df, # ARABIC LETTER AIN ISOLATED FORM + 0xfeca: 0x00c5, # ARABIC LETTER AIN FINAL FORM + 0xfecb: 0x00d9, # ARABIC LETTER AIN INITIAL FORM + 0xfecc: 0x00ec, # ARABIC LETTER AIN MEDIAL FORM + 0xfecd: 0x00ee, # ARABIC LETTER GHAIN ISOLATED FORM + 0xfece: 0x00ed, # ARABIC LETTER GHAIN FINAL FORM + 0xfecf: 0x00da, # ARABIC LETTER GHAIN INITIAL FORM + 0xfed0: 0x00f7, # ARABIC LETTER GHAIN MEDIAL FORM + 0xfed1: 0x00ba, # ARABIC LETTER FEH ISOLATED FORM + 0xfed3: 0x00e1, # ARABIC LETTER FEH INITIAL FORM + 0xfed5: 0x00f8, # ARABIC LETTER QAF ISOLATED FORM + 0xfed7: 0x00e2, # ARABIC LETTER QAF INITIAL FORM + 0xfed9: 0x00fc, # ARABIC LETTER KAF ISOLATED FORM + 0xfedb: 0x00e3, # ARABIC LETTER KAF INITIAL FORM + 0xfedd: 0x00fb, # ARABIC LETTER LAM ISOLATED FORM + 0xfedf: 0x00e4, # ARABIC LETTER LAM INITIAL FORM + 0xfee1: 0x00ef, # ARABIC LETTER MEEM ISOLATED FORM + 0xfee3: 0x00e5, # ARABIC LETTER MEEM INITIAL FORM + 0xfee5: 0x00f2, # ARABIC LETTER NOON ISOLATED FORM + 0xfee7: 0x00e6, # ARABIC LETTER NOON INITIAL FORM + 0xfee9: 0x00f3, # ARABIC LETTER HEH ISOLATED FORM + 0xfeeb: 0x00e7, # ARABIC LETTER HEH INITIAL FORM + 0xfeec: 0x00f4, # ARABIC LETTER HEH MEDIAL FORM + 0xfeed: 0x00e8, # ARABIC LETTER WAW ISOLATED FORM + 0xfeef: 0x00e9, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM + 0xfef0: 0x00f5, # ARABIC LETTER ALEF MAKSURA FINAL FORM + 0xfef1: 0x00fd, # ARABIC LETTER YEH ISOLATED FORM + 0xfef2: 0x00f6, # ARABIC LETTER YEH FINAL FORM + 0xfef3: 0x00ea, # ARABIC LETTER YEH INITIAL FORM + 0xfef5: 0x00f9, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM + 0xfef6: 0x00fa, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM + 0xfef7: 0x0099, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM + 0xfef8: 0x009a, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM + 0xfefb: 0x009d, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM + 0xfefc: 0x009e, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM +} Modified: python/branches/ssize_t/Lib/encodings/cp865.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp865.py (original) +++ python/branches/ssize_t/Lib/encodings/cp865.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,654 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00a4, # CURRENCY SIGN - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00a4, # CURRENCY SIGN + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE - u'\xa3' # 0x009c -> POUND SIGN - u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE - u'\u20a7' # 0x009e -> PESETA SIGN - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\u2310' # 0x00a9 -> REVERSED NOT SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xa4' # 0x00af -> CURRENCY SIGN - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\u20a7' # 0x009e -> PESETA SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xa4' # 0x00af -> CURRENCY SIGN + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a3: 0x009c, # POUND SIGN - 0x00a4: 0x00af, # CURRENCY SIGN - 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x20a7: 0x009e, # PESETA SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2310: 0x00a9, # REVERSED NOT SIGN - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} \ No newline at end of file + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x00af, # CURRENCY SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} Modified: python/branches/ssize_t/Lib/encodings/cp866.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp866.py (original) +++ python/branches/ssize_t/Lib/encodings/cp866.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,654 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00a8: 0x0438, # CYRILLIC SMALL LETTER I - 0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00ae: 0x043e, # CYRILLIC SMALL LETTER O - 0x00af: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U - 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E - 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI - 0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x2116, # NUMERO SIGN - 0x00fd: 0x00a4, # CURRENCY SIGN - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00a8: 0x0438, # CYRILLIC SMALL LETTER I + 0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00ae: 0x043e, # CYRILLIC SMALL LETTER O + 0x00af: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U + 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E + 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI + 0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x2116, # NUMERO SIGN + 0x00fd: 0x00a4, # CURRENCY SIGN + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\u0410' # 0x0080 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0x0081 -> CYRILLIC CAPITAL LETTER BE - u'\u0412' # 0x0082 -> CYRILLIC CAPITAL LETTER VE - u'\u0413' # 0x0083 -> CYRILLIC CAPITAL LETTER GHE - u'\u0414' # 0x0084 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0x0085 -> CYRILLIC CAPITAL LETTER IE - u'\u0416' # 0x0086 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0417' # 0x0087 -> CYRILLIC CAPITAL LETTER ZE - u'\u0418' # 0x0088 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0x0089 -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0x008a -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0x008b -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0x008c -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0x008d -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0x008e -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0x008f -> CYRILLIC CAPITAL LETTER PE - u'\u0420' # 0x0090 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0x0091 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0x0092 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0x0093 -> CYRILLIC CAPITAL LETTER U - u'\u0424' # 0x0094 -> CYRILLIC CAPITAL LETTER EF - u'\u0425' # 0x0095 -> CYRILLIC CAPITAL LETTER HA - u'\u0426' # 0x0096 -> CYRILLIC CAPITAL LETTER TSE - u'\u0427' # 0x0097 -> CYRILLIC CAPITAL LETTER CHE - u'\u0428' # 0x0098 -> CYRILLIC CAPITAL LETTER SHA - u'\u0429' # 0x0099 -> CYRILLIC CAPITAL LETTER SHCHA - u'\u042a' # 0x009a -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u042b' # 0x009b -> CYRILLIC CAPITAL LETTER YERU - u'\u042c' # 0x009c -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042d' # 0x009d -> CYRILLIC CAPITAL LETTER E - u'\u042e' # 0x009e -> CYRILLIC CAPITAL LETTER YU - u'\u042f' # 0x009f -> CYRILLIC CAPITAL LETTER YA - u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0x00a1 -> CYRILLIC SMALL LETTER BE - u'\u0432' # 0x00a2 -> CYRILLIC SMALL LETTER VE - u'\u0433' # 0x00a3 -> CYRILLIC SMALL LETTER GHE - u'\u0434' # 0x00a4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0x00a5 -> CYRILLIC SMALL LETTER IE - u'\u0436' # 0x00a6 -> CYRILLIC SMALL LETTER ZHE - u'\u0437' # 0x00a7 -> CYRILLIC SMALL LETTER ZE - u'\u0438' # 0x00a8 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0x00a9 -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0x00aa -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0x00ab -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0x00ac -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0x00ad -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0x00ae -> CYRILLIC SMALL LETTER O - u'\u043f' # 0x00af -> CYRILLIC SMALL LETTER PE - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u0440' # 0x00e0 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0x00e1 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0x00e2 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0x00e3 -> CYRILLIC SMALL LETTER U - u'\u0444' # 0x00e4 -> CYRILLIC SMALL LETTER EF - u'\u0445' # 0x00e5 -> CYRILLIC SMALL LETTER HA - u'\u0446' # 0x00e6 -> CYRILLIC SMALL LETTER TSE - u'\u0447' # 0x00e7 -> CYRILLIC SMALL LETTER CHE - u'\u0448' # 0x00e8 -> CYRILLIC SMALL LETTER SHA - u'\u0449' # 0x00e9 -> CYRILLIC SMALL LETTER SHCHA - u'\u044a' # 0x00ea -> CYRILLIC SMALL LETTER HARD SIGN - u'\u044b' # 0x00eb -> CYRILLIC SMALL LETTER YERU - u'\u044c' # 0x00ec -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044d' # 0x00ed -> CYRILLIC SMALL LETTER E - u'\u044e' # 0x00ee -> CYRILLIC SMALL LETTER YU - u'\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA - u'\u0401' # 0x00f0 -> CYRILLIC CAPITAL LETTER IO - u'\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO - u'\u0404' # 0x00f2 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u0454' # 0x00f3 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u0407' # 0x00f4 -> CYRILLIC CAPITAL LETTER YI - u'\u0457' # 0x00f5 -> CYRILLIC SMALL LETTER YI - u'\u040e' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHORT U - u'\u045e' # 0x00f7 -> CYRILLIC SMALL LETTER SHORT U - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u2116' # 0x00fc -> NUMERO SIGN - u'\xa4' # 0x00fd -> CURRENCY SIGN - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0410' # 0x0080 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0x0081 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0x0082 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0x0083 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0x0084 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0x0085 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0x0086 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0x0087 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0x0088 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0x0089 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0x008a -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0x008b -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0x008c -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0x008d -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0x008e -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0x008f -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0x0090 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0x0091 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0x0092 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0x0093 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0x0094 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0x0095 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0x0096 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0x0097 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0x0098 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0x0099 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0x009a -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0x009b -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0x009c -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0x009d -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0x009e -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0x009f -> CYRILLIC CAPITAL LETTER YA + u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0x00a1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0x00a2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0x00a3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0x00a4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0x00a5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0x00a6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0x00a7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0x00a8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0x00a9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0x00aa -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0x00ab -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0x00ac -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0x00ad -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0x00ae -> CYRILLIC SMALL LETTER O + u'\u043f' # 0x00af -> CYRILLIC SMALL LETTER PE + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u0440' # 0x00e0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0x00e1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0x00e2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0x00e3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0x00e4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0x00e5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0x00e6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0x00e7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0x00e8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0x00e9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0x00ea -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0x00eb -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0x00ec -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0x00ed -> CYRILLIC SMALL LETTER E + u'\u044e' # 0x00ee -> CYRILLIC SMALL LETTER YU + u'\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA + u'\u0401' # 0x00f0 -> CYRILLIC CAPITAL LETTER IO + u'\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO + u'\u0404' # 0x00f2 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0454' # 0x00f3 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0407' # 0x00f4 -> CYRILLIC CAPITAL LETTER YI + u'\u0457' # 0x00f5 -> CYRILLIC SMALL LETTER YI + u'\u040e' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045e' # 0x00f7 -> CYRILLIC SMALL LETTER SHORT U + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u2116' # 0x00fc -> NUMERO SIGN + u'\xa4' # 0x00fd -> CURRENCY SIGN + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a4: 0x00fd, # CURRENCY SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x0401: 0x00f0, # CYRILLIC CAPITAL LETTER IO - 0x0404: 0x00f2, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0407: 0x00f4, # CYRILLIC CAPITAL LETTER YI - 0x040e: 0x00f6, # CYRILLIC CAPITAL LETTER SHORT U - 0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A - 0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0x0083, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0x0084, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0x0085, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0x0086, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0x0087, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0x0088, # CYRILLIC CAPITAL LETTER I - 0x0419: 0x0089, # CYRILLIC CAPITAL LETTER SHORT I - 0x041a: 0x008a, # CYRILLIC CAPITAL LETTER KA - 0x041b: 0x008b, # CYRILLIC CAPITAL LETTER EL - 0x041c: 0x008c, # CYRILLIC CAPITAL LETTER EM - 0x041d: 0x008d, # CYRILLIC CAPITAL LETTER EN - 0x041e: 0x008e, # CYRILLIC CAPITAL LETTER O - 0x041f: 0x008f, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0x0090, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0x0091, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0x0092, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0x0093, # CYRILLIC CAPITAL LETTER U - 0x0424: 0x0094, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0x0095, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0x0096, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0x0097, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0x0098, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0x0099, # CYRILLIC CAPITAL LETTER SHCHA - 0x042a: 0x009a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042b: 0x009b, # CYRILLIC CAPITAL LETTER YERU - 0x042c: 0x009c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042d: 0x009d, # CYRILLIC CAPITAL LETTER E - 0x042e: 0x009e, # CYRILLIC CAPITAL LETTER YU - 0x042f: 0x009f, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A - 0x0431: 0x00a1, # CYRILLIC SMALL LETTER BE - 0x0432: 0x00a2, # CYRILLIC SMALL LETTER VE - 0x0433: 0x00a3, # CYRILLIC SMALL LETTER GHE - 0x0434: 0x00a4, # CYRILLIC SMALL LETTER DE - 0x0435: 0x00a5, # CYRILLIC SMALL LETTER IE - 0x0436: 0x00a6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0x00a7, # CYRILLIC SMALL LETTER ZE - 0x0438: 0x00a8, # CYRILLIC SMALL LETTER I - 0x0439: 0x00a9, # CYRILLIC SMALL LETTER SHORT I - 0x043a: 0x00aa, # CYRILLIC SMALL LETTER KA - 0x043b: 0x00ab, # CYRILLIC SMALL LETTER EL - 0x043c: 0x00ac, # CYRILLIC SMALL LETTER EM - 0x043d: 0x00ad, # CYRILLIC SMALL LETTER EN - 0x043e: 0x00ae, # CYRILLIC SMALL LETTER O - 0x043f: 0x00af, # CYRILLIC SMALL LETTER PE - 0x0440: 0x00e0, # CYRILLIC SMALL LETTER ER - 0x0441: 0x00e1, # CYRILLIC SMALL LETTER ES - 0x0442: 0x00e2, # CYRILLIC SMALL LETTER TE - 0x0443: 0x00e3, # CYRILLIC SMALL LETTER U - 0x0444: 0x00e4, # CYRILLIC SMALL LETTER EF - 0x0445: 0x00e5, # CYRILLIC SMALL LETTER HA - 0x0446: 0x00e6, # CYRILLIC SMALL LETTER TSE - 0x0447: 0x00e7, # CYRILLIC SMALL LETTER CHE - 0x0448: 0x00e8, # CYRILLIC SMALL LETTER SHA - 0x0449: 0x00e9, # CYRILLIC SMALL LETTER SHCHA - 0x044a: 0x00ea, # CYRILLIC SMALL LETTER HARD SIGN - 0x044b: 0x00eb, # CYRILLIC SMALL LETTER YERU - 0x044c: 0x00ec, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044d: 0x00ed, # CYRILLIC SMALL LETTER E - 0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU - 0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA - 0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO - 0x0454: 0x00f3, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0457: 0x00f5, # CYRILLIC SMALL LETTER YI - 0x045e: 0x00f7, # CYRILLIC SMALL LETTER SHORT U - 0x2116: 0x00fc, # NUMERO SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} \ No newline at end of file + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a4: 0x00fd, # CURRENCY SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x0401: 0x00f0, # CYRILLIC CAPITAL LETTER IO + 0x0404: 0x00f2, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0407: 0x00f4, # CYRILLIC CAPITAL LETTER YI + 0x040e: 0x00f6, # CYRILLIC CAPITAL LETTER SHORT U + 0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A + 0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0x0083, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0x0084, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0x0085, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0x0086, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0x0087, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0x0088, # CYRILLIC CAPITAL LETTER I + 0x0419: 0x0089, # CYRILLIC CAPITAL LETTER SHORT I + 0x041a: 0x008a, # CYRILLIC CAPITAL LETTER KA + 0x041b: 0x008b, # CYRILLIC CAPITAL LETTER EL + 0x041c: 0x008c, # CYRILLIC CAPITAL LETTER EM + 0x041d: 0x008d, # CYRILLIC CAPITAL LETTER EN + 0x041e: 0x008e, # CYRILLIC CAPITAL LETTER O + 0x041f: 0x008f, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0x0090, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0x0091, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0x0092, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0x0093, # CYRILLIC CAPITAL LETTER U + 0x0424: 0x0094, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0x0095, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0x0096, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0x0097, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0x0098, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0x0099, # CYRILLIC CAPITAL LETTER SHCHA + 0x042a: 0x009a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042b: 0x009b, # CYRILLIC CAPITAL LETTER YERU + 0x042c: 0x009c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042d: 0x009d, # CYRILLIC CAPITAL LETTER E + 0x042e: 0x009e, # CYRILLIC CAPITAL LETTER YU + 0x042f: 0x009f, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A + 0x0431: 0x00a1, # CYRILLIC SMALL LETTER BE + 0x0432: 0x00a2, # CYRILLIC SMALL LETTER VE + 0x0433: 0x00a3, # CYRILLIC SMALL LETTER GHE + 0x0434: 0x00a4, # CYRILLIC SMALL LETTER DE + 0x0435: 0x00a5, # CYRILLIC SMALL LETTER IE + 0x0436: 0x00a6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0x00a7, # CYRILLIC SMALL LETTER ZE + 0x0438: 0x00a8, # CYRILLIC SMALL LETTER I + 0x0439: 0x00a9, # CYRILLIC SMALL LETTER SHORT I + 0x043a: 0x00aa, # CYRILLIC SMALL LETTER KA + 0x043b: 0x00ab, # CYRILLIC SMALL LETTER EL + 0x043c: 0x00ac, # CYRILLIC SMALL LETTER EM + 0x043d: 0x00ad, # CYRILLIC SMALL LETTER EN + 0x043e: 0x00ae, # CYRILLIC SMALL LETTER O + 0x043f: 0x00af, # CYRILLIC SMALL LETTER PE + 0x0440: 0x00e0, # CYRILLIC SMALL LETTER ER + 0x0441: 0x00e1, # CYRILLIC SMALL LETTER ES + 0x0442: 0x00e2, # CYRILLIC SMALL LETTER TE + 0x0443: 0x00e3, # CYRILLIC SMALL LETTER U + 0x0444: 0x00e4, # CYRILLIC SMALL LETTER EF + 0x0445: 0x00e5, # CYRILLIC SMALL LETTER HA + 0x0446: 0x00e6, # CYRILLIC SMALL LETTER TSE + 0x0447: 0x00e7, # CYRILLIC SMALL LETTER CHE + 0x0448: 0x00e8, # CYRILLIC SMALL LETTER SHA + 0x0449: 0x00e9, # CYRILLIC SMALL LETTER SHCHA + 0x044a: 0x00ea, # CYRILLIC SMALL LETTER HARD SIGN + 0x044b: 0x00eb, # CYRILLIC SMALL LETTER YERU + 0x044c: 0x00ec, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044d: 0x00ed, # CYRILLIC SMALL LETTER E + 0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU + 0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA + 0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO + 0x0454: 0x00f3, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0457: 0x00f5, # CYRILLIC SMALL LETTER YI + 0x045e: 0x00f7, # CYRILLIC SMALL LETTER SHORT U + 0x2116: 0x00fc, # NUMERO SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} Modified: python/branches/ssize_t/Lib/encodings/cp869.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp869.py (original) +++ python/branches/ssize_t/Lib/encodings/cp869.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,645 +32,645 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: None, # UNDEFINED - 0x0081: None, # UNDEFINED - 0x0082: None, # UNDEFINED - 0x0083: None, # UNDEFINED - 0x0084: None, # UNDEFINED - 0x0085: None, # UNDEFINED - 0x0086: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0087: None, # UNDEFINED - 0x0088: 0x00b7, # MIDDLE DOT - 0x0089: 0x00ac, # NOT SIGN - 0x008a: 0x00a6, # BROKEN BAR - 0x008b: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x008c: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x008d: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x008e: 0x2015, # HORIZONTAL BAR - 0x008f: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x0090: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x0091: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x0092: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x0093: None, # UNDEFINED - 0x0094: None, # UNDEFINED - 0x0095: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x0096: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x0097: 0x00a9, # COPYRIGHT SIGN - 0x0098: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0099: 0x00b2, # SUPERSCRIPT TWO - 0x009a: 0x00b3, # SUPERSCRIPT THREE - 0x009b: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x009e: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x009f: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00a0: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00a1: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x00a2: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00a3: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00a4: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x00a5: 0x0392, # GREEK CAPITAL LETTER BETA - 0x00a6: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00a7: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x00a8: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x00a9: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x00aa: 0x0397, # GREEK CAPITAL LETTER ETA - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ad: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x00b6: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x00b7: 0x039c, # GREEK CAPITAL LETTER MU - 0x00b8: 0x039d, # GREEK CAPITAL LETTER NU - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x039e, # GREEK CAPITAL LETTER XI - 0x00be: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x03a0, # GREEK CAPITAL LETTER PI - 0x00c7: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00d0: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x00d1: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x00d2: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00d3: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x00d4: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x00d5: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00d6: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00d7: 0x03b2, # GREEK SMALL LETTER BETA - 0x00d8: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00de: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b6, # GREEK SMALL LETTER ZETA - 0x00e1: 0x03b7, # GREEK SMALL LETTER ETA - 0x00e2: 0x03b8, # GREEK SMALL LETTER THETA - 0x00e3: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00e4: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00e5: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00e6: 0x03bc, # GREEK SMALL LETTER MU - 0x00e7: 0x03bd, # GREEK SMALL LETTER NU - 0x00e8: 0x03be, # GREEK SMALL LETTER XI - 0x00e9: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00ea: 0x03c0, # GREEK SMALL LETTER PI - 0x00eb: 0x03c1, # GREEK SMALL LETTER RHO - 0x00ec: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00ed: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00ee: 0x03c4, # GREEK SMALL LETTER TAU - 0x00ef: 0x0384, # GREEK TONOS - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00f3: 0x03c6, # GREEK SMALL LETTER PHI - 0x00f4: 0x03c7, # GREEK SMALL LETTER CHI - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x03c8, # GREEK SMALL LETTER PSI - 0x00f7: 0x0385, # GREEK DIALYTIKA TONOS - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00fc: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: None, # UNDEFINED + 0x0081: None, # UNDEFINED + 0x0082: None, # UNDEFINED + 0x0083: None, # UNDEFINED + 0x0084: None, # UNDEFINED + 0x0085: None, # UNDEFINED + 0x0086: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0087: None, # UNDEFINED + 0x0088: 0x00b7, # MIDDLE DOT + 0x0089: 0x00ac, # NOT SIGN + 0x008a: 0x00a6, # BROKEN BAR + 0x008b: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x008c: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x008d: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x008e: 0x2015, # HORIZONTAL BAR + 0x008f: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x0090: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x0091: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x0092: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x0093: None, # UNDEFINED + 0x0094: None, # UNDEFINED + 0x0095: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x0096: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x0097: 0x00a9, # COPYRIGHT SIGN + 0x0098: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0099: 0x00b2, # SUPERSCRIPT TWO + 0x009a: 0x00b3, # SUPERSCRIPT THREE + 0x009b: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x009e: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x009f: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00a0: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00a1: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x00a2: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00a3: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00a4: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x00a5: 0x0392, # GREEK CAPITAL LETTER BETA + 0x00a6: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00a7: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x00a8: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x00a9: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x00aa: 0x0397, # GREEK CAPITAL LETTER ETA + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ad: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x00b6: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x00b7: 0x039c, # GREEK CAPITAL LETTER MU + 0x00b8: 0x039d, # GREEK CAPITAL LETTER NU + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x039e, # GREEK CAPITAL LETTER XI + 0x00be: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x03a0, # GREEK CAPITAL LETTER PI + 0x00c7: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00d0: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x00d1: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x00d2: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00d3: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x00d4: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x00d5: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00d6: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00d7: 0x03b2, # GREEK SMALL LETTER BETA + 0x00d8: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00de: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b6, # GREEK SMALL LETTER ZETA + 0x00e1: 0x03b7, # GREEK SMALL LETTER ETA + 0x00e2: 0x03b8, # GREEK SMALL LETTER THETA + 0x00e3: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00e4: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00e5: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00e6: 0x03bc, # GREEK SMALL LETTER MU + 0x00e7: 0x03bd, # GREEK SMALL LETTER NU + 0x00e8: 0x03be, # GREEK SMALL LETTER XI + 0x00e9: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00ea: 0x03c0, # GREEK SMALL LETTER PI + 0x00eb: 0x03c1, # GREEK SMALL LETTER RHO + 0x00ec: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00ed: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00ee: 0x03c4, # GREEK SMALL LETTER TAU + 0x00ef: 0x0384, # GREEK TONOS + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00f3: 0x03c6, # GREEK SMALL LETTER PHI + 0x00f4: 0x03c7, # GREEK SMALL LETTER CHI + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x03c8, # GREEK SMALL LETTER PSI + 0x00f7: 0x0385, # GREEK DIALYTIKA TONOS + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00fc: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\ufffe' # 0x0080 -> UNDEFINED - u'\ufffe' # 0x0081 -> UNDEFINED - u'\ufffe' # 0x0082 -> UNDEFINED - u'\ufffe' # 0x0083 -> UNDEFINED - u'\ufffe' # 0x0084 -> UNDEFINED - u'\ufffe' # 0x0085 -> UNDEFINED - u'\u0386' # 0x0086 -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\ufffe' # 0x0087 -> UNDEFINED - u'\xb7' # 0x0088 -> MIDDLE DOT - u'\xac' # 0x0089 -> NOT SIGN - u'\xa6' # 0x008a -> BROKEN BAR - u'\u2018' # 0x008b -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x008c -> RIGHT SINGLE QUOTATION MARK - u'\u0388' # 0x008d -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u2015' # 0x008e -> HORIZONTAL BAR - u'\u0389' # 0x008f -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0x0090 -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\u03aa' # 0x0091 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\u038c' # 0x0092 -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\ufffe' # 0x0093 -> UNDEFINED - u'\ufffe' # 0x0094 -> UNDEFINED - u'\u038e' # 0x0095 -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u03ab' # 0x0096 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\xa9' # 0x0097 -> COPYRIGHT SIGN - u'\u038f' # 0x0098 -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\xb2' # 0x0099 -> SUPERSCRIPT TWO - u'\xb3' # 0x009a -> SUPERSCRIPT THREE - u'\u03ac' # 0x009b -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\xa3' # 0x009c -> POUND SIGN - u'\u03ad' # 0x009d -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0x009e -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03af' # 0x009f -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03ca' # 0x00a0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u0390' # 0x00a1 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u03cc' # 0x00a2 -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0x00a3 -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u0391' # 0x00a4 -> GREEK CAPITAL LETTER ALPHA - u'\u0392' # 0x00a5 -> GREEK CAPITAL LETTER BETA - u'\u0393' # 0x00a6 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0x00a7 -> GREEK CAPITAL LETTER DELTA - u'\u0395' # 0x00a8 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0x00a9 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0x00aa -> GREEK CAPITAL LETTER ETA - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\u0398' # 0x00ac -> GREEK CAPITAL LETTER THETA - u'\u0399' # 0x00ad -> GREEK CAPITAL LETTER IOTA - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u039a' # 0x00b5 -> GREEK CAPITAL LETTER KAPPA - u'\u039b' # 0x00b6 -> GREEK CAPITAL LETTER LAMDA - u'\u039c' # 0x00b7 -> GREEK CAPITAL LETTER MU - u'\u039d' # 0x00b8 -> GREEK CAPITAL LETTER NU - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u039e' # 0x00bd -> GREEK CAPITAL LETTER XI - u'\u039f' # 0x00be -> GREEK CAPITAL LETTER OMICRON - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u03a0' # 0x00c6 -> GREEK CAPITAL LETTER PI - u'\u03a1' # 0x00c7 -> GREEK CAPITAL LETTER RHO - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u03a3' # 0x00cf -> GREEK CAPITAL LETTER SIGMA - u'\u03a4' # 0x00d0 -> GREEK CAPITAL LETTER TAU - u'\u03a5' # 0x00d1 -> GREEK CAPITAL LETTER UPSILON - u'\u03a6' # 0x00d2 -> GREEK CAPITAL LETTER PHI - u'\u03a7' # 0x00d3 -> GREEK CAPITAL LETTER CHI - u'\u03a8' # 0x00d4 -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0x00d5 -> GREEK CAPITAL LETTER OMEGA - u'\u03b1' # 0x00d6 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0x00d7 -> GREEK SMALL LETTER BETA - u'\u03b3' # 0x00d8 -> GREEK SMALL LETTER GAMMA - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u03b4' # 0x00dd -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0x00de -> GREEK SMALL LETTER EPSILON - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b6' # 0x00e0 -> GREEK SMALL LETTER ZETA - u'\u03b7' # 0x00e1 -> GREEK SMALL LETTER ETA - u'\u03b8' # 0x00e2 -> GREEK SMALL LETTER THETA - u'\u03b9' # 0x00e3 -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0x00e4 -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0x00e5 -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0x00e6 -> GREEK SMALL LETTER MU - u'\u03bd' # 0x00e7 -> GREEK SMALL LETTER NU - u'\u03be' # 0x00e8 -> GREEK SMALL LETTER XI - u'\u03bf' # 0x00e9 -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0x00ea -> GREEK SMALL LETTER PI - u'\u03c1' # 0x00eb -> GREEK SMALL LETTER RHO - u'\u03c3' # 0x00ec -> GREEK SMALL LETTER SIGMA - u'\u03c2' # 0x00ed -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c4' # 0x00ee -> GREEK SMALL LETTER TAU - u'\u0384' # 0x00ef -> GREEK TONOS - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u03c5' # 0x00f2 -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0x00f3 -> GREEK SMALL LETTER PHI - u'\u03c7' # 0x00f4 -> GREEK SMALL LETTER CHI - u'\xa7' # 0x00f5 -> SECTION SIGN - u'\u03c8' # 0x00f6 -> GREEK SMALL LETTER PSI - u'\u0385' # 0x00f7 -> GREEK DIALYTIKA TONOS - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\xa8' # 0x00f9 -> DIAERESIS - u'\u03c9' # 0x00fa -> GREEK SMALL LETTER OMEGA - u'\u03cb' # 0x00fb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03b0' # 0x00fc -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\u03ce' # 0x00fd -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\ufffe' # 0x0080 -> UNDEFINED + u'\ufffe' # 0x0081 -> UNDEFINED + u'\ufffe' # 0x0082 -> UNDEFINED + u'\ufffe' # 0x0083 -> UNDEFINED + u'\ufffe' # 0x0084 -> UNDEFINED + u'\ufffe' # 0x0085 -> UNDEFINED + u'\u0386' # 0x0086 -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\ufffe' # 0x0087 -> UNDEFINED + u'\xb7' # 0x0088 -> MIDDLE DOT + u'\xac' # 0x0089 -> NOT SIGN + u'\xa6' # 0x008a -> BROKEN BAR + u'\u2018' # 0x008b -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x008c -> RIGHT SINGLE QUOTATION MARK + u'\u0388' # 0x008d -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u2015' # 0x008e -> HORIZONTAL BAR + u'\u0389' # 0x008f -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0x0090 -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\u03aa' # 0x0091 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u038c' # 0x0092 -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\ufffe' # 0x0093 -> UNDEFINED + u'\ufffe' # 0x0094 -> UNDEFINED + u'\u038e' # 0x0095 -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u03ab' # 0x0096 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\xa9' # 0x0097 -> COPYRIGHT SIGN + u'\u038f' # 0x0098 -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\xb2' # 0x0099 -> SUPERSCRIPT TWO + u'\xb3' # 0x009a -> SUPERSCRIPT THREE + u'\u03ac' # 0x009b -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\xa3' # 0x009c -> POUND SIGN + u'\u03ad' # 0x009d -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0x009e -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0x009f -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03ca' # 0x00a0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u0390' # 0x00a1 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u03cc' # 0x00a2 -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0x00a3 -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u0391' # 0x00a4 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0x00a5 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0x00a6 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0x00a7 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0x00a8 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0x00a9 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0x00aa -> GREEK CAPITAL LETTER ETA + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\u0398' # 0x00ac -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0x00ad -> GREEK CAPITAL LETTER IOTA + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u039a' # 0x00b5 -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0x00b6 -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0x00b7 -> GREEK CAPITAL LETTER MU + u'\u039d' # 0x00b8 -> GREEK CAPITAL LETTER NU + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u039e' # 0x00bd -> GREEK CAPITAL LETTER XI + u'\u039f' # 0x00be -> GREEK CAPITAL LETTER OMICRON + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u03a0' # 0x00c6 -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0x00c7 -> GREEK CAPITAL LETTER RHO + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u03a3' # 0x00cf -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0x00d0 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0x00d1 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0x00d2 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0x00d3 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0x00d4 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0x00d5 -> GREEK CAPITAL LETTER OMEGA + u'\u03b1' # 0x00d6 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0x00d7 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0x00d8 -> GREEK SMALL LETTER GAMMA + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u03b4' # 0x00dd -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0x00de -> GREEK SMALL LETTER EPSILON + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b6' # 0x00e0 -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0x00e1 -> GREEK SMALL LETTER ETA + u'\u03b8' # 0x00e2 -> GREEK SMALL LETTER THETA + u'\u03b9' # 0x00e3 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0x00e4 -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0x00e5 -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0x00e6 -> GREEK SMALL LETTER MU + u'\u03bd' # 0x00e7 -> GREEK SMALL LETTER NU + u'\u03be' # 0x00e8 -> GREEK SMALL LETTER XI + u'\u03bf' # 0x00e9 -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0x00ea -> GREEK SMALL LETTER PI + u'\u03c1' # 0x00eb -> GREEK SMALL LETTER RHO + u'\u03c3' # 0x00ec -> GREEK SMALL LETTER SIGMA + u'\u03c2' # 0x00ed -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c4' # 0x00ee -> GREEK SMALL LETTER TAU + u'\u0384' # 0x00ef -> GREEK TONOS + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u03c5' # 0x00f2 -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0x00f3 -> GREEK SMALL LETTER PHI + u'\u03c7' # 0x00f4 -> GREEK SMALL LETTER CHI + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\u03c8' # 0x00f6 -> GREEK SMALL LETTER PSI + u'\u0385' # 0x00f7 -> GREEK DIALYTIKA TONOS + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\u03c9' # 0x00fa -> GREEK SMALL LETTER OMEGA + u'\u03cb' # 0x00fb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03b0' # 0x00fc -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\u03ce' # 0x00fd -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a3: 0x009c, # POUND SIGN - 0x00a6: 0x008a, # BROKEN BAR - 0x00a7: 0x00f5, # SECTION SIGN - 0x00a8: 0x00f9, # DIAERESIS - 0x00a9: 0x0097, # COPYRIGHT SIGN - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x0089, # NOT SIGN - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x0099, # SUPERSCRIPT TWO - 0x00b3: 0x009a, # SUPERSCRIPT THREE - 0x00b7: 0x0088, # MIDDLE DOT - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x0384: 0x00ef, # GREEK TONOS - 0x0385: 0x00f7, # GREEK DIALYTIKA TONOS - 0x0386: 0x0086, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0388: 0x008d, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0389: 0x008f, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038a: 0x0090, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038c: 0x0092, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038e: 0x0095, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038f: 0x0098, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0390: 0x00a1, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x0391: 0x00a4, # GREEK CAPITAL LETTER ALPHA - 0x0392: 0x00a5, # GREEK CAPITAL LETTER BETA - 0x0393: 0x00a6, # GREEK CAPITAL LETTER GAMMA - 0x0394: 0x00a7, # GREEK CAPITAL LETTER DELTA - 0x0395: 0x00a8, # GREEK CAPITAL LETTER EPSILON - 0x0396: 0x00a9, # GREEK CAPITAL LETTER ZETA - 0x0397: 0x00aa, # GREEK CAPITAL LETTER ETA - 0x0398: 0x00ac, # GREEK CAPITAL LETTER THETA - 0x0399: 0x00ad, # GREEK CAPITAL LETTER IOTA - 0x039a: 0x00b5, # GREEK CAPITAL LETTER KAPPA - 0x039b: 0x00b6, # GREEK CAPITAL LETTER LAMDA - 0x039c: 0x00b7, # GREEK CAPITAL LETTER MU - 0x039d: 0x00b8, # GREEK CAPITAL LETTER NU - 0x039e: 0x00bd, # GREEK CAPITAL LETTER XI - 0x039f: 0x00be, # GREEK CAPITAL LETTER OMICRON - 0x03a0: 0x00c6, # GREEK CAPITAL LETTER PI - 0x03a1: 0x00c7, # GREEK CAPITAL LETTER RHO - 0x03a3: 0x00cf, # GREEK CAPITAL LETTER SIGMA - 0x03a4: 0x00d0, # GREEK CAPITAL LETTER TAU - 0x03a5: 0x00d1, # GREEK CAPITAL LETTER UPSILON - 0x03a6: 0x00d2, # GREEK CAPITAL LETTER PHI - 0x03a7: 0x00d3, # GREEK CAPITAL LETTER CHI - 0x03a8: 0x00d4, # GREEK CAPITAL LETTER PSI - 0x03a9: 0x00d5, # GREEK CAPITAL LETTER OMEGA - 0x03aa: 0x0091, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03ab: 0x0096, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03ac: 0x009b, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03ad: 0x009d, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03ae: 0x009e, # GREEK SMALL LETTER ETA WITH TONOS - 0x03af: 0x009f, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03b0: 0x00fc, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x03b1: 0x00d6, # GREEK SMALL LETTER ALPHA - 0x03b2: 0x00d7, # GREEK SMALL LETTER BETA - 0x03b3: 0x00d8, # GREEK SMALL LETTER GAMMA - 0x03b4: 0x00dd, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00de, # GREEK SMALL LETTER EPSILON - 0x03b6: 0x00e0, # GREEK SMALL LETTER ZETA - 0x03b7: 0x00e1, # GREEK SMALL LETTER ETA - 0x03b8: 0x00e2, # GREEK SMALL LETTER THETA - 0x03b9: 0x00e3, # GREEK SMALL LETTER IOTA - 0x03ba: 0x00e4, # GREEK SMALL LETTER KAPPA - 0x03bb: 0x00e5, # GREEK SMALL LETTER LAMDA - 0x03bc: 0x00e6, # GREEK SMALL LETTER MU - 0x03bd: 0x00e7, # GREEK SMALL LETTER NU - 0x03be: 0x00e8, # GREEK SMALL LETTER XI - 0x03bf: 0x00e9, # GREEK SMALL LETTER OMICRON - 0x03c0: 0x00ea, # GREEK SMALL LETTER PI - 0x03c1: 0x00eb, # GREEK SMALL LETTER RHO - 0x03c2: 0x00ed, # GREEK SMALL LETTER FINAL SIGMA - 0x03c3: 0x00ec, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00ee, # GREEK SMALL LETTER TAU - 0x03c5: 0x00f2, # GREEK SMALL LETTER UPSILON - 0x03c6: 0x00f3, # GREEK SMALL LETTER PHI - 0x03c7: 0x00f4, # GREEK SMALL LETTER CHI - 0x03c8: 0x00f6, # GREEK SMALL LETTER PSI - 0x03c9: 0x00fa, # GREEK SMALL LETTER OMEGA - 0x03ca: 0x00a0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03cb: 0x00fb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03cc: 0x00a2, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03cd: 0x00a3, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03ce: 0x00fd, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x2015: 0x008e, # HORIZONTAL BAR - 0x2018: 0x008b, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x008c, # RIGHT SINGLE QUOTATION MARK - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} \ No newline at end of file + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a3: 0x009c, # POUND SIGN + 0x00a6: 0x008a, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00a9: 0x0097, # COPYRIGHT SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x0089, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x0099, # SUPERSCRIPT TWO + 0x00b3: 0x009a, # SUPERSCRIPT THREE + 0x00b7: 0x0088, # MIDDLE DOT + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x0384: 0x00ef, # GREEK TONOS + 0x0385: 0x00f7, # GREEK DIALYTIKA TONOS + 0x0386: 0x0086, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0x008d, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0x008f, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038a: 0x0090, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038c: 0x0092, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038e: 0x0095, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038f: 0x0098, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0x00a1, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x0391: 0x00a4, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0x00a5, # GREEK CAPITAL LETTER BETA + 0x0393: 0x00a6, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0x00a7, # GREEK CAPITAL LETTER DELTA + 0x0395: 0x00a8, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0x00a9, # GREEK CAPITAL LETTER ZETA + 0x0397: 0x00aa, # GREEK CAPITAL LETTER ETA + 0x0398: 0x00ac, # GREEK CAPITAL LETTER THETA + 0x0399: 0x00ad, # GREEK CAPITAL LETTER IOTA + 0x039a: 0x00b5, # GREEK CAPITAL LETTER KAPPA + 0x039b: 0x00b6, # GREEK CAPITAL LETTER LAMDA + 0x039c: 0x00b7, # GREEK CAPITAL LETTER MU + 0x039d: 0x00b8, # GREEK CAPITAL LETTER NU + 0x039e: 0x00bd, # GREEK CAPITAL LETTER XI + 0x039f: 0x00be, # GREEK CAPITAL LETTER OMICRON + 0x03a0: 0x00c6, # GREEK CAPITAL LETTER PI + 0x03a1: 0x00c7, # GREEK CAPITAL LETTER RHO + 0x03a3: 0x00cf, # GREEK CAPITAL LETTER SIGMA + 0x03a4: 0x00d0, # GREEK CAPITAL LETTER TAU + 0x03a5: 0x00d1, # GREEK CAPITAL LETTER UPSILON + 0x03a6: 0x00d2, # GREEK CAPITAL LETTER PHI + 0x03a7: 0x00d3, # GREEK CAPITAL LETTER CHI + 0x03a8: 0x00d4, # GREEK CAPITAL LETTER PSI + 0x03a9: 0x00d5, # GREEK CAPITAL LETTER OMEGA + 0x03aa: 0x0091, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03ab: 0x0096, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03ac: 0x009b, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03ad: 0x009d, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03ae: 0x009e, # GREEK SMALL LETTER ETA WITH TONOS + 0x03af: 0x009f, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03b0: 0x00fc, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03b1: 0x00d6, # GREEK SMALL LETTER ALPHA + 0x03b2: 0x00d7, # GREEK SMALL LETTER BETA + 0x03b3: 0x00d8, # GREEK SMALL LETTER GAMMA + 0x03b4: 0x00dd, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00de, # GREEK SMALL LETTER EPSILON + 0x03b6: 0x00e0, # GREEK SMALL LETTER ZETA + 0x03b7: 0x00e1, # GREEK SMALL LETTER ETA + 0x03b8: 0x00e2, # GREEK SMALL LETTER THETA + 0x03b9: 0x00e3, # GREEK SMALL LETTER IOTA + 0x03ba: 0x00e4, # GREEK SMALL LETTER KAPPA + 0x03bb: 0x00e5, # GREEK SMALL LETTER LAMDA + 0x03bc: 0x00e6, # GREEK SMALL LETTER MU + 0x03bd: 0x00e7, # GREEK SMALL LETTER NU + 0x03be: 0x00e8, # GREEK SMALL LETTER XI + 0x03bf: 0x00e9, # GREEK SMALL LETTER OMICRON + 0x03c0: 0x00ea, # GREEK SMALL LETTER PI + 0x03c1: 0x00eb, # GREEK SMALL LETTER RHO + 0x03c2: 0x00ed, # GREEK SMALL LETTER FINAL SIGMA + 0x03c3: 0x00ec, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00ee, # GREEK SMALL LETTER TAU + 0x03c5: 0x00f2, # GREEK SMALL LETTER UPSILON + 0x03c6: 0x00f3, # GREEK SMALL LETTER PHI + 0x03c7: 0x00f4, # GREEK SMALL LETTER CHI + 0x03c8: 0x00f6, # GREEK SMALL LETTER PSI + 0x03c9: 0x00fa, # GREEK SMALL LETTER OMEGA + 0x03ca: 0x00a0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03cb: 0x00fb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03cc: 0x00a2, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03cd: 0x00a3, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0x00fd, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2015: 0x008e, # HORIZONTAL BAR + 0x2018: 0x008b, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x008c, # RIGHT SINGLE QUOTATION MARK + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} Modified: python/branches/ssize_t/Lib/encodings/cp874.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp874.py (original) +++ python/branches/ssize_t/Lib/encodings/cp874.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,491 +32,490 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\ufffe' # 0x82 -> UNDEFINED - u'\ufffe' # 0x83 -> UNDEFINED - u'\ufffe' # 0x84 -> UNDEFINED - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\ufffe' # 0x86 -> UNDEFINED - u'\ufffe' # 0x87 -> UNDEFINED - u'\ufffe' # 0x88 -> UNDEFINED - u'\ufffe' # 0x89 -> UNDEFINED - u'\ufffe' # 0x8A -> UNDEFINED - u'\ufffe' # 0x8B -> UNDEFINED - u'\ufffe' # 0x8C -> UNDEFINED - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\ufffe' # 0x98 -> UNDEFINED - u'\ufffe' # 0x99 -> UNDEFINED - u'\ufffe' # 0x9A -> UNDEFINED - u'\ufffe' # 0x9B -> UNDEFINED - u'\ufffe' # 0x9C -> UNDEFINED - u'\ufffe' # 0x9D -> UNDEFINED - u'\ufffe' # 0x9E -> UNDEFINED - u'\ufffe' # 0x9F -> UNDEFINED - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI - u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI - u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT - u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI - u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON - u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG - u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU - u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN - u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING - u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG - u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO - u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE - u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING - u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA - u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK - u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN - u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO - u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO - u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN - u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK - u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO - u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG - u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN - u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG - u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU - u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI - u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA - u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG - u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA - u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN - u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN - u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO - u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA - u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK - u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA - u'\u0e24' # 0xC4 -> THAI CHARACTER RU - u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING - u'\u0e26' # 0xC6 -> THAI CHARACTER LU - u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN - u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA - u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI - u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA - u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP - u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA - u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG - u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK - u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI - u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A - u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT - u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA - u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM - u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I - u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II - u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE - u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE - u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U - u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU - u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU - u'\ufffe' # 0xDB -> UNDEFINED - u'\ufffe' # 0xDC -> UNDEFINED - u'\ufffe' # 0xDD -> UNDEFINED - u'\ufffe' # 0xDE -> UNDEFINED - u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT - u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E - u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE - u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O - u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN - u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI - u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO - u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK - u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU - u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK - u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO - u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI - u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA - u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT - u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT - u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN - u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN - u'\u0e50' # 0xF0 -> THAI DIGIT ZERO - u'\u0e51' # 0xF1 -> THAI DIGIT ONE - u'\u0e52' # 0xF2 -> THAI DIGIT TWO - u'\u0e53' # 0xF3 -> THAI DIGIT THREE - u'\u0e54' # 0xF4 -> THAI DIGIT FOUR - u'\u0e55' # 0xF5 -> THAI DIGIT FIVE - u'\u0e56' # 0xF6 -> THAI DIGIT SIX - u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN - u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT - u'\u0e59' # 0xF9 -> THAI DIGIT NINE - u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU - u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT - u'\ufffe' # 0xFC -> UNDEFINED - u'\ufffe' # 0xFD -> UNDEFINED - u'\ufffe' # 0xFE -> UNDEFINED - u'\ufffe' # 0xFF -> UNDEFINED + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\ufffe' # 0x81 -> UNDEFINED + u'\ufffe' # 0x82 -> UNDEFINED + u'\ufffe' # 0x83 -> UNDEFINED + u'\ufffe' # 0x84 -> UNDEFINED + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\ufffe' # 0x86 -> UNDEFINED + u'\ufffe' # 0x87 -> UNDEFINED + u'\ufffe' # 0x88 -> UNDEFINED + u'\ufffe' # 0x89 -> UNDEFINED + u'\ufffe' # 0x8A -> UNDEFINED + u'\ufffe' # 0x8B -> UNDEFINED + u'\ufffe' # 0x8C -> UNDEFINED + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\ufffe' # 0x8F -> UNDEFINED + u'\ufffe' # 0x90 -> UNDEFINED + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\ufffe' # 0x98 -> UNDEFINED + u'\ufffe' # 0x99 -> UNDEFINED + u'\ufffe' # 0x9A -> UNDEFINED + u'\ufffe' # 0x9B -> UNDEFINED + u'\ufffe' # 0x9C -> UNDEFINED + u'\ufffe' # 0x9D -> UNDEFINED + u'\ufffe' # 0x9E -> UNDEFINED + u'\ufffe' # 0x9F -> UNDEFINED + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI + u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI + u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT + u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI + u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON + u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG + u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU + u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN + u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING + u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG + u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO + u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE + u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING + u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA + u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK + u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN + u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO + u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO + u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN + u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK + u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO + u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG + u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN + u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG + u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU + u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI + u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA + u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG + u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA + u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN + u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN + u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO + u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA + u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK + u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA + u'\u0e24' # 0xC4 -> THAI CHARACTER RU + u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING + u'\u0e26' # 0xC6 -> THAI CHARACTER LU + u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN + u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA + u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI + u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA + u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP + u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA + u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG + u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK + u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI + u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A + u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT + u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA + u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM + u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I + u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II + u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE + u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE + u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U + u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU + u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU + u'\ufffe' # 0xDB -> UNDEFINED + u'\ufffe' # 0xDC -> UNDEFINED + u'\ufffe' # 0xDD -> UNDEFINED + u'\ufffe' # 0xDE -> UNDEFINED + u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT + u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E + u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE + u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O + u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN + u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI + u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO + u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK + u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU + u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK + u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO + u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI + u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA + u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT + u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT + u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN + u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN + u'\u0e50' # 0xF0 -> THAI DIGIT ZERO + u'\u0e51' # 0xF1 -> THAI DIGIT ONE + u'\u0e52' # 0xF2 -> THAI DIGIT TWO + u'\u0e53' # 0xF3 -> THAI DIGIT THREE + u'\u0e54' # 0xF4 -> THAI DIGIT FOUR + u'\u0e55' # 0xF5 -> THAI DIGIT FIVE + u'\u0e56' # 0xF6 -> THAI DIGIT SIX + u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN + u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT + u'\u0e59' # 0xF9 -> THAI DIGIT NINE + u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU + u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT + u'\ufffe' # 0xFC -> UNDEFINED + u'\ufffe' # 0xFD -> UNDEFINED + u'\ufffe' # 0xFE -> UNDEFINED + u'\ufffe' # 0xFF -> UNDEFINED ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x0E01: 0xA1, # THAI CHARACTER KO KAI - 0x0E02: 0xA2, # THAI CHARACTER KHO KHAI - 0x0E03: 0xA3, # THAI CHARACTER KHO KHUAT - 0x0E04: 0xA4, # THAI CHARACTER KHO KHWAI - 0x0E05: 0xA5, # THAI CHARACTER KHO KHON - 0x0E06: 0xA6, # THAI CHARACTER KHO RAKHANG - 0x0E07: 0xA7, # THAI CHARACTER NGO NGU - 0x0E08: 0xA8, # THAI CHARACTER CHO CHAN - 0x0E09: 0xA9, # THAI CHARACTER CHO CHING - 0x0E0A: 0xAA, # THAI CHARACTER CHO CHANG - 0x0E0B: 0xAB, # THAI CHARACTER SO SO - 0x0E0C: 0xAC, # THAI CHARACTER CHO CHOE - 0x0E0D: 0xAD, # THAI CHARACTER YO YING - 0x0E0E: 0xAE, # THAI CHARACTER DO CHADA - 0x0E0F: 0xAF, # THAI CHARACTER TO PATAK - 0x0E10: 0xB0, # THAI CHARACTER THO THAN - 0x0E11: 0xB1, # THAI CHARACTER THO NANGMONTHO - 0x0E12: 0xB2, # THAI CHARACTER THO PHUTHAO - 0x0E13: 0xB3, # THAI CHARACTER NO NEN - 0x0E14: 0xB4, # THAI CHARACTER DO DEK - 0x0E15: 0xB5, # THAI CHARACTER TO TAO - 0x0E16: 0xB6, # THAI CHARACTER THO THUNG - 0x0E17: 0xB7, # THAI CHARACTER THO THAHAN - 0x0E18: 0xB8, # THAI CHARACTER THO THONG - 0x0E19: 0xB9, # THAI CHARACTER NO NU - 0x0E1A: 0xBA, # THAI CHARACTER BO BAIMAI - 0x0E1B: 0xBB, # THAI CHARACTER PO PLA - 0x0E1C: 0xBC, # THAI CHARACTER PHO PHUNG - 0x0E1D: 0xBD, # THAI CHARACTER FO FA - 0x0E1E: 0xBE, # THAI CHARACTER PHO PHAN - 0x0E1F: 0xBF, # THAI CHARACTER FO FAN - 0x0E20: 0xC0, # THAI CHARACTER PHO SAMPHAO - 0x0E21: 0xC1, # THAI CHARACTER MO MA - 0x0E22: 0xC2, # THAI CHARACTER YO YAK - 0x0E23: 0xC3, # THAI CHARACTER RO RUA - 0x0E24: 0xC4, # THAI CHARACTER RU - 0x0E25: 0xC5, # THAI CHARACTER LO LING - 0x0E26: 0xC6, # THAI CHARACTER LU - 0x0E27: 0xC7, # THAI CHARACTER WO WAEN - 0x0E28: 0xC8, # THAI CHARACTER SO SALA - 0x0E29: 0xC9, # THAI CHARACTER SO RUSI - 0x0E2A: 0xCA, # THAI CHARACTER SO SUA - 0x0E2B: 0xCB, # THAI CHARACTER HO HIP - 0x0E2C: 0xCC, # THAI CHARACTER LO CHULA - 0x0E2D: 0xCD, # THAI CHARACTER O ANG - 0x0E2E: 0xCE, # THAI CHARACTER HO NOKHUK - 0x0E2F: 0xCF, # THAI CHARACTER PAIYANNOI - 0x0E30: 0xD0, # THAI CHARACTER SARA A - 0x0E31: 0xD1, # THAI CHARACTER MAI HAN-AKAT - 0x0E32: 0xD2, # THAI CHARACTER SARA AA - 0x0E33: 0xD3, # THAI CHARACTER SARA AM - 0x0E34: 0xD4, # THAI CHARACTER SARA I - 0x0E35: 0xD5, # THAI CHARACTER SARA II - 0x0E36: 0xD6, # THAI CHARACTER SARA UE - 0x0E37: 0xD7, # THAI CHARACTER SARA UEE - 0x0E38: 0xD8, # THAI CHARACTER SARA U - 0x0E39: 0xD9, # THAI CHARACTER SARA UU - 0x0E3A: 0xDA, # THAI CHARACTER PHINTHU - 0x0E3F: 0xDF, # THAI CURRENCY SYMBOL BAHT - 0x0E40: 0xE0, # THAI CHARACTER SARA E - 0x0E41: 0xE1, # THAI CHARACTER SARA AE - 0x0E42: 0xE2, # THAI CHARACTER SARA O - 0x0E43: 0xE3, # THAI CHARACTER SARA AI MAIMUAN - 0x0E44: 0xE4, # THAI CHARACTER SARA AI MAIMALAI - 0x0E45: 0xE5, # THAI CHARACTER LAKKHANGYAO - 0x0E46: 0xE6, # THAI CHARACTER MAIYAMOK - 0x0E47: 0xE7, # THAI CHARACTER MAITAIKHU - 0x0E48: 0xE8, # THAI CHARACTER MAI EK - 0x0E49: 0xE9, # THAI CHARACTER MAI THO - 0x0E4A: 0xEA, # THAI CHARACTER MAI TRI - 0x0E4B: 0xEB, # THAI CHARACTER MAI CHATTAWA - 0x0E4C: 0xEC, # THAI CHARACTER THANTHAKHAT - 0x0E4D: 0xED, # THAI CHARACTER NIKHAHIT - 0x0E4E: 0xEE, # THAI CHARACTER YAMAKKAN - 0x0E4F: 0xEF, # THAI CHARACTER FONGMAN - 0x0E50: 0xF0, # THAI DIGIT ZERO - 0x0E51: 0xF1, # THAI DIGIT ONE - 0x0E52: 0xF2, # THAI DIGIT TWO - 0x0E53: 0xF3, # THAI DIGIT THREE - 0x0E54: 0xF4, # THAI DIGIT FOUR - 0x0E55: 0xF5, # THAI DIGIT FIVE - 0x0E56: 0xF6, # THAI DIGIT SIX - 0x0E57: 0xF7, # THAI DIGIT SEVEN - 0x0E58: 0xF8, # THAI DIGIT EIGHT - 0x0E59: 0xF9, # THAI DIGIT NINE - 0x0E5A: 0xFA, # THAI CHARACTER ANGKHANKHU - 0x0E5B: 0xFB, # THAI CHARACTER KHOMUT - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x20AC: 0x80, # EURO SIGN + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x0E01: 0xA1, # THAI CHARACTER KO KAI + 0x0E02: 0xA2, # THAI CHARACTER KHO KHAI + 0x0E03: 0xA3, # THAI CHARACTER KHO KHUAT + 0x0E04: 0xA4, # THAI CHARACTER KHO KHWAI + 0x0E05: 0xA5, # THAI CHARACTER KHO KHON + 0x0E06: 0xA6, # THAI CHARACTER KHO RAKHANG + 0x0E07: 0xA7, # THAI CHARACTER NGO NGU + 0x0E08: 0xA8, # THAI CHARACTER CHO CHAN + 0x0E09: 0xA9, # THAI CHARACTER CHO CHING + 0x0E0A: 0xAA, # THAI CHARACTER CHO CHANG + 0x0E0B: 0xAB, # THAI CHARACTER SO SO + 0x0E0C: 0xAC, # THAI CHARACTER CHO CHOE + 0x0E0D: 0xAD, # THAI CHARACTER YO YING + 0x0E0E: 0xAE, # THAI CHARACTER DO CHADA + 0x0E0F: 0xAF, # THAI CHARACTER TO PATAK + 0x0E10: 0xB0, # THAI CHARACTER THO THAN + 0x0E11: 0xB1, # THAI CHARACTER THO NANGMONTHO + 0x0E12: 0xB2, # THAI CHARACTER THO PHUTHAO + 0x0E13: 0xB3, # THAI CHARACTER NO NEN + 0x0E14: 0xB4, # THAI CHARACTER DO DEK + 0x0E15: 0xB5, # THAI CHARACTER TO TAO + 0x0E16: 0xB6, # THAI CHARACTER THO THUNG + 0x0E17: 0xB7, # THAI CHARACTER THO THAHAN + 0x0E18: 0xB8, # THAI CHARACTER THO THONG + 0x0E19: 0xB9, # THAI CHARACTER NO NU + 0x0E1A: 0xBA, # THAI CHARACTER BO BAIMAI + 0x0E1B: 0xBB, # THAI CHARACTER PO PLA + 0x0E1C: 0xBC, # THAI CHARACTER PHO PHUNG + 0x0E1D: 0xBD, # THAI CHARACTER FO FA + 0x0E1E: 0xBE, # THAI CHARACTER PHO PHAN + 0x0E1F: 0xBF, # THAI CHARACTER FO FAN + 0x0E20: 0xC0, # THAI CHARACTER PHO SAMPHAO + 0x0E21: 0xC1, # THAI CHARACTER MO MA + 0x0E22: 0xC2, # THAI CHARACTER YO YAK + 0x0E23: 0xC3, # THAI CHARACTER RO RUA + 0x0E24: 0xC4, # THAI CHARACTER RU + 0x0E25: 0xC5, # THAI CHARACTER LO LING + 0x0E26: 0xC6, # THAI CHARACTER LU + 0x0E27: 0xC7, # THAI CHARACTER WO WAEN + 0x0E28: 0xC8, # THAI CHARACTER SO SALA + 0x0E29: 0xC9, # THAI CHARACTER SO RUSI + 0x0E2A: 0xCA, # THAI CHARACTER SO SUA + 0x0E2B: 0xCB, # THAI CHARACTER HO HIP + 0x0E2C: 0xCC, # THAI CHARACTER LO CHULA + 0x0E2D: 0xCD, # THAI CHARACTER O ANG + 0x0E2E: 0xCE, # THAI CHARACTER HO NOKHUK + 0x0E2F: 0xCF, # THAI CHARACTER PAIYANNOI + 0x0E30: 0xD0, # THAI CHARACTER SARA A + 0x0E31: 0xD1, # THAI CHARACTER MAI HAN-AKAT + 0x0E32: 0xD2, # THAI CHARACTER SARA AA + 0x0E33: 0xD3, # THAI CHARACTER SARA AM + 0x0E34: 0xD4, # THAI CHARACTER SARA I + 0x0E35: 0xD5, # THAI CHARACTER SARA II + 0x0E36: 0xD6, # THAI CHARACTER SARA UE + 0x0E37: 0xD7, # THAI CHARACTER SARA UEE + 0x0E38: 0xD8, # THAI CHARACTER SARA U + 0x0E39: 0xD9, # THAI CHARACTER SARA UU + 0x0E3A: 0xDA, # THAI CHARACTER PHINTHU + 0x0E3F: 0xDF, # THAI CURRENCY SYMBOL BAHT + 0x0E40: 0xE0, # THAI CHARACTER SARA E + 0x0E41: 0xE1, # THAI CHARACTER SARA AE + 0x0E42: 0xE2, # THAI CHARACTER SARA O + 0x0E43: 0xE3, # THAI CHARACTER SARA AI MAIMUAN + 0x0E44: 0xE4, # THAI CHARACTER SARA AI MAIMALAI + 0x0E45: 0xE5, # THAI CHARACTER LAKKHANGYAO + 0x0E46: 0xE6, # THAI CHARACTER MAIYAMOK + 0x0E47: 0xE7, # THAI CHARACTER MAITAIKHU + 0x0E48: 0xE8, # THAI CHARACTER MAI EK + 0x0E49: 0xE9, # THAI CHARACTER MAI THO + 0x0E4A: 0xEA, # THAI CHARACTER MAI TRI + 0x0E4B: 0xEB, # THAI CHARACTER MAI CHATTAWA + 0x0E4C: 0xEC, # THAI CHARACTER THANTHAKHAT + 0x0E4D: 0xED, # THAI CHARACTER NIKHAHIT + 0x0E4E: 0xEE, # THAI CHARACTER YAMAKKAN + 0x0E4F: 0xEF, # THAI CHARACTER FONGMAN + 0x0E50: 0xF0, # THAI DIGIT ZERO + 0x0E51: 0xF1, # THAI DIGIT ONE + 0x0E52: 0xF2, # THAI DIGIT TWO + 0x0E53: 0xF3, # THAI DIGIT THREE + 0x0E54: 0xF4, # THAI DIGIT FOUR + 0x0E55: 0xF5, # THAI DIGIT FIVE + 0x0E56: 0xF6, # THAI DIGIT SIX + 0x0E57: 0xF7, # THAI DIGIT SEVEN + 0x0E58: 0xF8, # THAI DIGIT EIGHT + 0x0E59: 0xF9, # THAI DIGIT NINE + 0x0E5A: 0xFA, # THAI CHARACTER ANGKHANKHU + 0x0E5B: 0xFB, # THAI CHARACTER KHOMUT + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x20AC: 0x80, # EURO SIGN } - Modified: python/branches/ssize_t/Lib/encodings/cp875.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/cp875.py (original) +++ python/branches/ssize_t/Lib/encodings/cp875.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,516 +32,515 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> CONTROL - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> CONTROL - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> CONTROL - u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0A -> CONTROL - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> CONTROL - u'\x85' # 0x15 -> CONTROL - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> CONTROL - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> CONTROL - u'\x8f' # 0x1B -> CONTROL - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> CONTROL - u'\x81' # 0x21 -> CONTROL - u'\x82' # 0x22 -> CONTROL - u'\x83' # 0x23 -> CONTROL - u'\x84' # 0x24 -> CONTROL - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> CONTROL - u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2A -> CONTROL - u'\x8b' # 0x2B -> CONTROL - u'\x8c' # 0x2C -> CONTROL - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> CONTROL - u'\x91' # 0x31 -> CONTROL - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> CONTROL - u'\x94' # 0x34 -> CONTROL - u'\x95' # 0x35 -> CONTROL - u'\x96' # 0x36 -> CONTROL - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> CONTROL - u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3A -> CONTROL - u'\x9b' # 0x3B -> CONTROL - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> CONTROL - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\u0391' # 0x41 -> GREEK CAPITAL LETTER ALPHA - u'\u0392' # 0x42 -> GREEK CAPITAL LETTER BETA - u'\u0393' # 0x43 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0x44 -> GREEK CAPITAL LETTER DELTA - u'\u0395' # 0x45 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0x46 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0x47 -> GREEK CAPITAL LETTER ETA - u'\u0398' # 0x48 -> GREEK CAPITAL LETTER THETA - u'\u0399' # 0x49 -> GREEK CAPITAL LETTER IOTA - u'[' # 0x4A -> LEFT SQUARE BRACKET - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'!' # 0x4F -> EXCLAMATION MARK - u'&' # 0x50 -> AMPERSAND - u'\u039a' # 0x51 -> GREEK CAPITAL LETTER KAPPA - u'\u039b' # 0x52 -> GREEK CAPITAL LETTER LAMDA - u'\u039c' # 0x53 -> GREEK CAPITAL LETTER MU - u'\u039d' # 0x54 -> GREEK CAPITAL LETTER NU - u'\u039e' # 0x55 -> GREEK CAPITAL LETTER XI - u'\u039f' # 0x56 -> GREEK CAPITAL LETTER OMICRON - u'\u03a0' # 0x57 -> GREEK CAPITAL LETTER PI - u'\u03a1' # 0x58 -> GREEK CAPITAL LETTER RHO - u'\u03a3' # 0x59 -> GREEK CAPITAL LETTER SIGMA - u']' # 0x5A -> RIGHT SQUARE BRACKET - u'$' # 0x5B -> DOLLAR SIGN - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'^' # 0x5F -> CIRCUMFLEX ACCENT - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\u03a4' # 0x62 -> GREEK CAPITAL LETTER TAU - u'\u03a5' # 0x63 -> GREEK CAPITAL LETTER UPSILON - u'\u03a6' # 0x64 -> GREEK CAPITAL LETTER PHI - u'\u03a7' # 0x65 -> GREEK CAPITAL LETTER CHI - u'\u03a8' # 0x66 -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0x67 -> GREEK CAPITAL LETTER OMEGA - u'\u03aa' # 0x68 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\u03ab' # 0x69 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'|' # 0x6A -> VERTICAL LINE - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\xa8' # 0x70 -> DIAERESIS - u'\u0386' # 0x71 -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\u0388' # 0x72 -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0389' # 0x73 -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\xa0' # 0x74 -> NO-BREAK SPACE - u'\u038a' # 0x75 -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\u038c' # 0x76 -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\u038e' # 0x77 -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u038f' # 0x78 -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7A -> COLON - u'#' # 0x7B -> NUMBER SIGN - u'@' # 0x7C -> COMMERCIAL AT - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'"' # 0x7F -> QUOTATION MARK - u'\u0385' # 0x80 -> GREEK DIALYTIKA TONOS - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\u03b1' # 0x8A -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0x8B -> GREEK SMALL LETTER BETA - u'\u03b3' # 0x8C -> GREEK SMALL LETTER GAMMA - u'\u03b4' # 0x8D -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0x8E -> GREEK SMALL LETTER EPSILON - u'\u03b6' # 0x8F -> GREEK SMALL LETTER ZETA - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\u03b7' # 0x9A -> GREEK SMALL LETTER ETA - u'\u03b8' # 0x9B -> GREEK SMALL LETTER THETA - u'\u03b9' # 0x9C -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0x9D -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0x9E -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0x9F -> GREEK SMALL LETTER MU - u'\xb4' # 0xA0 -> ACUTE ACCENT - u'~' # 0xA1 -> TILDE - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\u03bd' # 0xAA -> GREEK SMALL LETTER NU - u'\u03be' # 0xAB -> GREEK SMALL LETTER XI - u'\u03bf' # 0xAC -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0xAD -> GREEK SMALL LETTER PI - u'\u03c1' # 0xAE -> GREEK SMALL LETTER RHO - u'\u03c3' # 0xAF -> GREEK SMALL LETTER SIGMA - u'\xa3' # 0xB0 -> POUND SIGN - u'\u03ac' # 0xB1 -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u03ad' # 0xB2 -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0xB3 -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03ca' # 0xB4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03af' # 0xB5 -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03cc' # 0xB6 -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0xB7 -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03cb' # 0xB8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03ce' # 0xB9 -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\u03c2' # 0xBA -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c4' # 0xBB -> GREEK SMALL LETTER TAU - u'\u03c5' # 0xBC -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0xBD -> GREEK SMALL LETTER PHI - u'\u03c7' # 0xBE -> GREEK SMALL LETTER CHI - u'\u03c8' # 0xBF -> GREEK SMALL LETTER PSI - u'{' # 0xC0 -> LEFT CURLY BRACKET - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\u03c9' # 0xCB -> GREEK SMALL LETTER OMEGA - u'\u0390' # 0xCC -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u03b0' # 0xCD -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\u2018' # 0xCE -> LEFT SINGLE QUOTATION MARK - u'\u2015' # 0xCF -> HORIZONTAL BAR - u'}' # 0xD0 -> RIGHT CURLY BRACKET - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb1' # 0xDA -> PLUS-MINUS SIGN - u'\xbd' # 0xDB -> VULGAR FRACTION ONE HALF - u'\x1a' # 0xDC -> SUBSTITUTE - u'\u0387' # 0xDD -> GREEK ANO TELEIA - u'\u2019' # 0xDE -> RIGHT SINGLE QUOTATION MARK - u'\xa6' # 0xDF -> BROKEN BAR - u'\\' # 0xE0 -> REVERSE SOLIDUS - u'\x1a' # 0xE1 -> SUBSTITUTE - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\xa7' # 0xEB -> SECTION SIGN - u'\x1a' # 0xEC -> SUBSTITUTE - u'\x1a' # 0xED -> SUBSTITUTE - u'\xab' # 0xEE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xEF -> NOT SIGN - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\xa9' # 0xFB -> COPYRIGHT SIGN - u'\x1a' # 0xFC -> SUBSTITUTE - u'\x1a' # 0xFD -> SUBSTITUTE - u'\xbb' # 0xFE -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\x9f' # 0xFF -> CONTROL + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x9c' # 0x04 -> CONTROL + u'\t' # 0x05 -> HORIZONTAL TABULATION + u'\x86' # 0x06 -> CONTROL + u'\x7f' # 0x07 -> DELETE + u'\x97' # 0x08 -> CONTROL + u'\x8d' # 0x09 -> CONTROL + u'\x8e' # 0x0A -> CONTROL + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x9d' # 0x14 -> CONTROL + u'\x85' # 0x15 -> CONTROL + u'\x08' # 0x16 -> BACKSPACE + u'\x87' # 0x17 -> CONTROL + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x92' # 0x1A -> CONTROL + u'\x8f' # 0x1B -> CONTROL + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u'\x80' # 0x20 -> CONTROL + u'\x81' # 0x21 -> CONTROL + u'\x82' # 0x22 -> CONTROL + u'\x83' # 0x23 -> CONTROL + u'\x84' # 0x24 -> CONTROL + u'\n' # 0x25 -> LINE FEED + u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK + u'\x1b' # 0x27 -> ESCAPE + u'\x88' # 0x28 -> CONTROL + u'\x89' # 0x29 -> CONTROL + u'\x8a' # 0x2A -> CONTROL + u'\x8b' # 0x2B -> CONTROL + u'\x8c' # 0x2C -> CONTROL + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL + u'\x90' # 0x30 -> CONTROL + u'\x91' # 0x31 -> CONTROL + u'\x16' # 0x32 -> SYNCHRONOUS IDLE + u'\x93' # 0x33 -> CONTROL + u'\x94' # 0x34 -> CONTROL + u'\x95' # 0x35 -> CONTROL + u'\x96' # 0x36 -> CONTROL + u'\x04' # 0x37 -> END OF TRANSMISSION + u'\x98' # 0x38 -> CONTROL + u'\x99' # 0x39 -> CONTROL + u'\x9a' # 0x3A -> CONTROL + u'\x9b' # 0x3B -> CONTROL + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> CONTROL + u'\x1a' # 0x3F -> SUBSTITUTE + u' ' # 0x40 -> SPACE + u'\u0391' # 0x41 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0x42 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0x43 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0x44 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0x45 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0x46 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0x47 -> GREEK CAPITAL LETTER ETA + u'\u0398' # 0x48 -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0x49 -> GREEK CAPITAL LETTER IOTA + u'[' # 0x4A -> LEFT SQUARE BRACKET + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'!' # 0x4F -> EXCLAMATION MARK + u'&' # 0x50 -> AMPERSAND + u'\u039a' # 0x51 -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0x52 -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0x53 -> GREEK CAPITAL LETTER MU + u'\u039d' # 0x54 -> GREEK CAPITAL LETTER NU + u'\u039e' # 0x55 -> GREEK CAPITAL LETTER XI + u'\u039f' # 0x56 -> GREEK CAPITAL LETTER OMICRON + u'\u03a0' # 0x57 -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0x58 -> GREEK CAPITAL LETTER RHO + u'\u03a3' # 0x59 -> GREEK CAPITAL LETTER SIGMA + u']' # 0x5A -> RIGHT SQUARE BRACKET + u'$' # 0x5B -> DOLLAR SIGN + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'^' # 0x5F -> CIRCUMFLEX ACCENT + u'-' # 0x60 -> HYPHEN-MINUS + u'/' # 0x61 -> SOLIDUS + u'\u03a4' # 0x62 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0x63 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0x64 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0x65 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0x66 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0x67 -> GREEK CAPITAL LETTER OMEGA + u'\u03aa' # 0x68 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u03ab' # 0x69 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'|' # 0x6A -> VERTICAL LINE + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK + u'\xa8' # 0x70 -> DIAERESIS + u'\u0386' # 0x71 -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\u0388' # 0x72 -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0389' # 0x73 -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\xa0' # 0x74 -> NO-BREAK SPACE + u'\u038a' # 0x75 -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\u038c' # 0x76 -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\u038e' # 0x77 -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u038f' # 0x78 -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'`' # 0x79 -> GRAVE ACCENT + u':' # 0x7A -> COLON + u'#' # 0x7B -> NUMBER SIGN + u'@' # 0x7C -> COMMERCIAL AT + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'"' # 0x7F -> QUOTATION MARK + u'\u0385' # 0x80 -> GREEK DIALYTIKA TONOS + u'a' # 0x81 -> LATIN SMALL LETTER A + u'b' # 0x82 -> LATIN SMALL LETTER B + u'c' # 0x83 -> LATIN SMALL LETTER C + u'd' # 0x84 -> LATIN SMALL LETTER D + u'e' # 0x85 -> LATIN SMALL LETTER E + u'f' # 0x86 -> LATIN SMALL LETTER F + u'g' # 0x87 -> LATIN SMALL LETTER G + u'h' # 0x88 -> LATIN SMALL LETTER H + u'i' # 0x89 -> LATIN SMALL LETTER I + u'\u03b1' # 0x8A -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0x8B -> GREEK SMALL LETTER BETA + u'\u03b3' # 0x8C -> GREEK SMALL LETTER GAMMA + u'\u03b4' # 0x8D -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0x8E -> GREEK SMALL LETTER EPSILON + u'\u03b6' # 0x8F -> GREEK SMALL LETTER ZETA + u'\xb0' # 0x90 -> DEGREE SIGN + u'j' # 0x91 -> LATIN SMALL LETTER J + u'k' # 0x92 -> LATIN SMALL LETTER K + u'l' # 0x93 -> LATIN SMALL LETTER L + u'm' # 0x94 -> LATIN SMALL LETTER M + u'n' # 0x95 -> LATIN SMALL LETTER N + u'o' # 0x96 -> LATIN SMALL LETTER O + u'p' # 0x97 -> LATIN SMALL LETTER P + u'q' # 0x98 -> LATIN SMALL LETTER Q + u'r' # 0x99 -> LATIN SMALL LETTER R + u'\u03b7' # 0x9A -> GREEK SMALL LETTER ETA + u'\u03b8' # 0x9B -> GREEK SMALL LETTER THETA + u'\u03b9' # 0x9C -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0x9D -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0x9E -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0x9F -> GREEK SMALL LETTER MU + u'\xb4' # 0xA0 -> ACUTE ACCENT + u'~' # 0xA1 -> TILDE + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\u03bd' # 0xAA -> GREEK SMALL LETTER NU + u'\u03be' # 0xAB -> GREEK SMALL LETTER XI + u'\u03bf' # 0xAC -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0xAD -> GREEK SMALL LETTER PI + u'\u03c1' # 0xAE -> GREEK SMALL LETTER RHO + u'\u03c3' # 0xAF -> GREEK SMALL LETTER SIGMA + u'\xa3' # 0xB0 -> POUND SIGN + u'\u03ac' # 0xB1 -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u03ad' # 0xB2 -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0xB3 -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03ca' # 0xB4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03af' # 0xB5 -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03cc' # 0xB6 -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0xB7 -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03cb' # 0xB8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03ce' # 0xB9 -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u03c2' # 0xBA -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c4' # 0xBB -> GREEK SMALL LETTER TAU + u'\u03c5' # 0xBC -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0xBD -> GREEK SMALL LETTER PHI + u'\u03c7' # 0xBE -> GREEK SMALL LETTER CHI + u'\u03c8' # 0xBF -> GREEK SMALL LETTER PSI + u'{' # 0xC0 -> LEFT CURLY BRACKET + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\u03c9' # 0xCB -> GREEK SMALL LETTER OMEGA + u'\u0390' # 0xCC -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u03b0' # 0xCD -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\u2018' # 0xCE -> LEFT SINGLE QUOTATION MARK + u'\u2015' # 0xCF -> HORIZONTAL BAR + u'}' # 0xD0 -> RIGHT CURLY BRACKET + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb1' # 0xDA -> PLUS-MINUS SIGN + u'\xbd' # 0xDB -> VULGAR FRACTION ONE HALF + u'\x1a' # 0xDC -> SUBSTITUTE + u'\u0387' # 0xDD -> GREEK ANO TELEIA + u'\u2019' # 0xDE -> RIGHT SINGLE QUOTATION MARK + u'\xa6' # 0xDF -> BROKEN BAR + u'\\' # 0xE0 -> REVERSE SOLIDUS + u'\x1a' # 0xE1 -> SUBSTITUTE + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\xa7' # 0xEB -> SECTION SIGN + u'\x1a' # 0xEC -> SUBSTITUTE + u'\x1a' # 0xED -> SUBSTITUTE + u'\xab' # 0xEE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xEF -> NOT SIGN + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\xa9' # 0xFB -> COPYRIGHT SIGN + u'\x1a' # 0xFC -> SUBSTITUTE + u'\x1a' # 0xFD -> SUBSTITUTE + u'\xbb' # 0xFE -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\x9f' # 0xFF -> CONTROL ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2D, # ENQUIRY - 0x0006: 0x2E, # ACKNOWLEDGE - 0x0007: 0x2F, # BELL - 0x0008: 0x16, # BACKSPACE - 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000A: 0x25, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3C, # DEVICE CONTROL FOUR - 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x32, # SYNCHRONOUS IDLE - 0x0017: 0x26, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: None, # SUBSTITUTE - 0x001B: 0x27, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x40, # SPACE - 0x0021: 0x4F, # EXCLAMATION MARK - 0x0022: 0x7F, # QUOTATION MARK - 0x0023: 0x7B, # NUMBER SIGN - 0x0024: 0x5B, # DOLLAR SIGN - 0x0025: 0x6C, # PERCENT SIGN - 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7D, # APOSTROPHE - 0x0028: 0x4D, # LEFT PARENTHESIS - 0x0029: 0x5D, # RIGHT PARENTHESIS - 0x002A: 0x5C, # ASTERISK - 0x002B: 0x4E, # PLUS SIGN - 0x002C: 0x6B, # COMMA - 0x002D: 0x60, # HYPHEN-MINUS - 0x002E: 0x4B, # FULL STOP - 0x002F: 0x61, # SOLIDUS - 0x0030: 0xF0, # DIGIT ZERO - 0x0031: 0xF1, # DIGIT ONE - 0x0032: 0xF2, # DIGIT TWO - 0x0033: 0xF3, # DIGIT THREE - 0x0034: 0xF4, # DIGIT FOUR - 0x0035: 0xF5, # DIGIT FIVE - 0x0036: 0xF6, # DIGIT SIX - 0x0037: 0xF7, # DIGIT SEVEN - 0x0038: 0xF8, # DIGIT EIGHT - 0x0039: 0xF9, # DIGIT NINE - 0x003A: 0x7A, # COLON - 0x003B: 0x5E, # SEMICOLON - 0x003C: 0x4C, # LESS-THAN SIGN - 0x003D: 0x7E, # EQUALS SIGN - 0x003E: 0x6E, # GREATER-THAN SIGN - 0x003F: 0x6F, # QUESTION MARK - 0x0040: 0x7C, # COMMERCIAL AT - 0x0041: 0xC1, # LATIN CAPITAL LETTER A - 0x0042: 0xC2, # LATIN CAPITAL LETTER B - 0x0043: 0xC3, # LATIN CAPITAL LETTER C - 0x0044: 0xC4, # LATIN CAPITAL LETTER D - 0x0045: 0xC5, # LATIN CAPITAL LETTER E - 0x0046: 0xC6, # LATIN CAPITAL LETTER F - 0x0047: 0xC7, # LATIN CAPITAL LETTER G - 0x0048: 0xC8, # LATIN CAPITAL LETTER H - 0x0049: 0xC9, # LATIN CAPITAL LETTER I - 0x004A: 0xD1, # LATIN CAPITAL LETTER J - 0x004B: 0xD2, # LATIN CAPITAL LETTER K - 0x004C: 0xD3, # LATIN CAPITAL LETTER L - 0x004D: 0xD4, # LATIN CAPITAL LETTER M - 0x004E: 0xD5, # LATIN CAPITAL LETTER N - 0x004F: 0xD6, # LATIN CAPITAL LETTER O - 0x0050: 0xD7, # LATIN CAPITAL LETTER P - 0x0051: 0xD8, # LATIN CAPITAL LETTER Q - 0x0052: 0xD9, # LATIN CAPITAL LETTER R - 0x0053: 0xE2, # LATIN CAPITAL LETTER S - 0x0054: 0xE3, # LATIN CAPITAL LETTER T - 0x0055: 0xE4, # LATIN CAPITAL LETTER U - 0x0056: 0xE5, # LATIN CAPITAL LETTER V - 0x0057: 0xE6, # LATIN CAPITAL LETTER W - 0x0058: 0xE7, # LATIN CAPITAL LETTER X - 0x0059: 0xE8, # LATIN CAPITAL LETTER Y - 0x005A: 0xE9, # LATIN CAPITAL LETTER Z - 0x005B: 0x4A, # LEFT SQUARE BRACKET - 0x005C: 0xE0, # REVERSE SOLIDUS - 0x005D: 0x5A, # RIGHT SQUARE BRACKET - 0x005E: 0x5F, # CIRCUMFLEX ACCENT - 0x005F: 0x6D, # LOW LINE - 0x0060: 0x79, # GRAVE ACCENT - 0x0061: 0x81, # LATIN SMALL LETTER A - 0x0062: 0x82, # LATIN SMALL LETTER B - 0x0063: 0x83, # LATIN SMALL LETTER C - 0x0064: 0x84, # LATIN SMALL LETTER D - 0x0065: 0x85, # LATIN SMALL LETTER E - 0x0066: 0x86, # LATIN SMALL LETTER F - 0x0067: 0x87, # LATIN SMALL LETTER G - 0x0068: 0x88, # LATIN SMALL LETTER H - 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006A: 0x91, # LATIN SMALL LETTER J - 0x006B: 0x92, # LATIN SMALL LETTER K - 0x006C: 0x93, # LATIN SMALL LETTER L - 0x006D: 0x94, # LATIN SMALL LETTER M - 0x006E: 0x95, # LATIN SMALL LETTER N - 0x006F: 0x96, # LATIN SMALL LETTER O - 0x0070: 0x97, # LATIN SMALL LETTER P - 0x0071: 0x98, # LATIN SMALL LETTER Q - 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xA2, # LATIN SMALL LETTER S - 0x0074: 0xA3, # LATIN SMALL LETTER T - 0x0075: 0xA4, # LATIN SMALL LETTER U - 0x0076: 0xA5, # LATIN SMALL LETTER V - 0x0077: 0xA6, # LATIN SMALL LETTER W - 0x0078: 0xA7, # LATIN SMALL LETTER X - 0x0079: 0xA8, # LATIN SMALL LETTER Y - 0x007A: 0xA9, # LATIN SMALL LETTER Z - 0x007B: 0xC0, # LEFT CURLY BRACKET - 0x007C: 0x6A, # VERTICAL LINE - 0x007D: 0xD0, # RIGHT CURLY BRACKET - 0x007E: 0xA1, # TILDE - 0x007F: 0x07, # DELETE - 0x0080: 0x20, # CONTROL - 0x0081: 0x21, # CONTROL - 0x0082: 0x22, # CONTROL - 0x0083: 0x23, # CONTROL - 0x0084: 0x24, # CONTROL - 0x0085: 0x15, # CONTROL - 0x0086: 0x06, # CONTROL - 0x0087: 0x17, # CONTROL - 0x0088: 0x28, # CONTROL - 0x0089: 0x29, # CONTROL - 0x008A: 0x2A, # CONTROL - 0x008B: 0x2B, # CONTROL - 0x008C: 0x2C, # CONTROL - 0x008D: 0x09, # CONTROL - 0x008E: 0x0A, # CONTROL - 0x008F: 0x1B, # CONTROL - 0x0090: 0x30, # CONTROL - 0x0091: 0x31, # CONTROL - 0x0092: 0x1A, # CONTROL - 0x0093: 0x33, # CONTROL - 0x0094: 0x34, # CONTROL - 0x0095: 0x35, # CONTROL - 0x0096: 0x36, # CONTROL - 0x0097: 0x08, # CONTROL - 0x0098: 0x38, # CONTROL - 0x0099: 0x39, # CONTROL - 0x009A: 0x3A, # CONTROL - 0x009B: 0x3B, # CONTROL - 0x009C: 0x04, # CONTROL - 0x009D: 0x14, # CONTROL - 0x009E: 0x3E, # CONTROL - 0x009F: 0xFF, # CONTROL - 0x00A0: 0x74, # NO-BREAK SPACE - 0x00A3: 0xB0, # POUND SIGN - 0x00A6: 0xDF, # BROKEN BAR - 0x00A7: 0xEB, # SECTION SIGN - 0x00A8: 0x70, # DIAERESIS - 0x00A9: 0xFB, # COPYRIGHT SIGN - 0x00AB: 0xEE, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xEF, # NOT SIGN - 0x00AD: 0xCA, # SOFT HYPHEN - 0x00B0: 0x90, # DEGREE SIGN - 0x00B1: 0xDA, # PLUS-MINUS SIGN - 0x00B2: 0xEA, # SUPERSCRIPT TWO - 0x00B3: 0xFA, # SUPERSCRIPT THREE - 0x00B4: 0xA0, # ACUTE ACCENT - 0x00BB: 0xFE, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BD: 0xDB, # VULGAR FRACTION ONE HALF - 0x0385: 0x80, # GREEK DIALYTIKA TONOS - 0x0386: 0x71, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0387: 0xDD, # GREEK ANO TELEIA - 0x0388: 0x72, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0389: 0x73, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038A: 0x75, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038C: 0x76, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038E: 0x77, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038F: 0x78, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0390: 0xCC, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x0391: 0x41, # GREEK CAPITAL LETTER ALPHA - 0x0392: 0x42, # GREEK CAPITAL LETTER BETA - 0x0393: 0x43, # GREEK CAPITAL LETTER GAMMA - 0x0394: 0x44, # GREEK CAPITAL LETTER DELTA - 0x0395: 0x45, # GREEK CAPITAL LETTER EPSILON - 0x0396: 0x46, # GREEK CAPITAL LETTER ZETA - 0x0397: 0x47, # GREEK CAPITAL LETTER ETA - 0x0398: 0x48, # GREEK CAPITAL LETTER THETA - 0x0399: 0x49, # GREEK CAPITAL LETTER IOTA - 0x039A: 0x51, # GREEK CAPITAL LETTER KAPPA - 0x039B: 0x52, # GREEK CAPITAL LETTER LAMDA - 0x039C: 0x53, # GREEK CAPITAL LETTER MU - 0x039D: 0x54, # GREEK CAPITAL LETTER NU - 0x039E: 0x55, # GREEK CAPITAL LETTER XI - 0x039F: 0x56, # GREEK CAPITAL LETTER OMICRON - 0x03A0: 0x57, # GREEK CAPITAL LETTER PI - 0x03A1: 0x58, # GREEK CAPITAL LETTER RHO - 0x03A3: 0x59, # GREEK CAPITAL LETTER SIGMA - 0x03A4: 0x62, # GREEK CAPITAL LETTER TAU - 0x03A5: 0x63, # GREEK CAPITAL LETTER UPSILON - 0x03A6: 0x64, # GREEK CAPITAL LETTER PHI - 0x03A7: 0x65, # GREEK CAPITAL LETTER CHI - 0x03A8: 0x66, # GREEK CAPITAL LETTER PSI - 0x03A9: 0x67, # GREEK CAPITAL LETTER OMEGA - 0x03AA: 0x68, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03AB: 0x69, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03AC: 0xB1, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03AD: 0xB2, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03AE: 0xB3, # GREEK SMALL LETTER ETA WITH TONOS - 0x03AF: 0xB5, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03B0: 0xCD, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x03B1: 0x8A, # GREEK SMALL LETTER ALPHA - 0x03B2: 0x8B, # GREEK SMALL LETTER BETA - 0x03B3: 0x8C, # GREEK SMALL LETTER GAMMA - 0x03B4: 0x8D, # GREEK SMALL LETTER DELTA - 0x03B5: 0x8E, # GREEK SMALL LETTER EPSILON - 0x03B6: 0x8F, # GREEK SMALL LETTER ZETA - 0x03B7: 0x9A, # GREEK SMALL LETTER ETA - 0x03B8: 0x9B, # GREEK SMALL LETTER THETA - 0x03B9: 0x9C, # GREEK SMALL LETTER IOTA - 0x03BA: 0x9D, # GREEK SMALL LETTER KAPPA - 0x03BB: 0x9E, # GREEK SMALL LETTER LAMDA - 0x03BC: 0x9F, # GREEK SMALL LETTER MU - 0x03BD: 0xAA, # GREEK SMALL LETTER NU - 0x03BE: 0xAB, # GREEK SMALL LETTER XI - 0x03BF: 0xAC, # GREEK SMALL LETTER OMICRON - 0x03C0: 0xAD, # GREEK SMALL LETTER PI - 0x03C1: 0xAE, # GREEK SMALL LETTER RHO - 0x03C2: 0xBA, # GREEK SMALL LETTER FINAL SIGMA - 0x03C3: 0xAF, # GREEK SMALL LETTER SIGMA - 0x03C4: 0xBB, # GREEK SMALL LETTER TAU - 0x03C5: 0xBC, # GREEK SMALL LETTER UPSILON - 0x03C6: 0xBD, # GREEK SMALL LETTER PHI - 0x03C7: 0xBE, # GREEK SMALL LETTER CHI - 0x03C8: 0xBF, # GREEK SMALL LETTER PSI - 0x03C9: 0xCB, # GREEK SMALL LETTER OMEGA - 0x03CA: 0xB4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03CB: 0xB8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03CC: 0xB6, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03CD: 0xB7, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03CE: 0xB9, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x2015: 0xCF, # HORIZONTAL BAR - 0x2018: 0xCE, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xDE, # RIGHT SINGLE QUOTATION MARK + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x37, # END OF TRANSMISSION + 0x0005: 0x2D, # ENQUIRY + 0x0006: 0x2E, # ACKNOWLEDGE + 0x0007: 0x2F, # BELL + 0x0008: 0x16, # BACKSPACE + 0x0009: 0x05, # HORIZONTAL TABULATION + 0x000A: 0x25, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x3C, # DEVICE CONTROL FOUR + 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x32, # SYNCHRONOUS IDLE + 0x0017: 0x26, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: None, # SUBSTITUTE + 0x001B: 0x27, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x40, # SPACE + 0x0021: 0x4F, # EXCLAMATION MARK + 0x0022: 0x7F, # QUOTATION MARK + 0x0023: 0x7B, # NUMBER SIGN + 0x0024: 0x5B, # DOLLAR SIGN + 0x0025: 0x6C, # PERCENT SIGN + 0x0026: 0x50, # AMPERSAND + 0x0027: 0x7D, # APOSTROPHE + 0x0028: 0x4D, # LEFT PARENTHESIS + 0x0029: 0x5D, # RIGHT PARENTHESIS + 0x002A: 0x5C, # ASTERISK + 0x002B: 0x4E, # PLUS SIGN + 0x002C: 0x6B, # COMMA + 0x002D: 0x60, # HYPHEN-MINUS + 0x002E: 0x4B, # FULL STOP + 0x002F: 0x61, # SOLIDUS + 0x0030: 0xF0, # DIGIT ZERO + 0x0031: 0xF1, # DIGIT ONE + 0x0032: 0xF2, # DIGIT TWO + 0x0033: 0xF3, # DIGIT THREE + 0x0034: 0xF4, # DIGIT FOUR + 0x0035: 0xF5, # DIGIT FIVE + 0x0036: 0xF6, # DIGIT SIX + 0x0037: 0xF7, # DIGIT SEVEN + 0x0038: 0xF8, # DIGIT EIGHT + 0x0039: 0xF9, # DIGIT NINE + 0x003A: 0x7A, # COLON + 0x003B: 0x5E, # SEMICOLON + 0x003C: 0x4C, # LESS-THAN SIGN + 0x003D: 0x7E, # EQUALS SIGN + 0x003E: 0x6E, # GREATER-THAN SIGN + 0x003F: 0x6F, # QUESTION MARK + 0x0040: 0x7C, # COMMERCIAL AT + 0x0041: 0xC1, # LATIN CAPITAL LETTER A + 0x0042: 0xC2, # LATIN CAPITAL LETTER B + 0x0043: 0xC3, # LATIN CAPITAL LETTER C + 0x0044: 0xC4, # LATIN CAPITAL LETTER D + 0x0045: 0xC5, # LATIN CAPITAL LETTER E + 0x0046: 0xC6, # LATIN CAPITAL LETTER F + 0x0047: 0xC7, # LATIN CAPITAL LETTER G + 0x0048: 0xC8, # LATIN CAPITAL LETTER H + 0x0049: 0xC9, # LATIN CAPITAL LETTER I + 0x004A: 0xD1, # LATIN CAPITAL LETTER J + 0x004B: 0xD2, # LATIN CAPITAL LETTER K + 0x004C: 0xD3, # LATIN CAPITAL LETTER L + 0x004D: 0xD4, # LATIN CAPITAL LETTER M + 0x004E: 0xD5, # LATIN CAPITAL LETTER N + 0x004F: 0xD6, # LATIN CAPITAL LETTER O + 0x0050: 0xD7, # LATIN CAPITAL LETTER P + 0x0051: 0xD8, # LATIN CAPITAL LETTER Q + 0x0052: 0xD9, # LATIN CAPITAL LETTER R + 0x0053: 0xE2, # LATIN CAPITAL LETTER S + 0x0054: 0xE3, # LATIN CAPITAL LETTER T + 0x0055: 0xE4, # LATIN CAPITAL LETTER U + 0x0056: 0xE5, # LATIN CAPITAL LETTER V + 0x0057: 0xE6, # LATIN CAPITAL LETTER W + 0x0058: 0xE7, # LATIN CAPITAL LETTER X + 0x0059: 0xE8, # LATIN CAPITAL LETTER Y + 0x005A: 0xE9, # LATIN CAPITAL LETTER Z + 0x005B: 0x4A, # LEFT SQUARE BRACKET + 0x005C: 0xE0, # REVERSE SOLIDUS + 0x005D: 0x5A, # RIGHT SQUARE BRACKET + 0x005E: 0x5F, # CIRCUMFLEX ACCENT + 0x005F: 0x6D, # LOW LINE + 0x0060: 0x79, # GRAVE ACCENT + 0x0061: 0x81, # LATIN SMALL LETTER A + 0x0062: 0x82, # LATIN SMALL LETTER B + 0x0063: 0x83, # LATIN SMALL LETTER C + 0x0064: 0x84, # LATIN SMALL LETTER D + 0x0065: 0x85, # LATIN SMALL LETTER E + 0x0066: 0x86, # LATIN SMALL LETTER F + 0x0067: 0x87, # LATIN SMALL LETTER G + 0x0068: 0x88, # LATIN SMALL LETTER H + 0x0069: 0x89, # LATIN SMALL LETTER I + 0x006A: 0x91, # LATIN SMALL LETTER J + 0x006B: 0x92, # LATIN SMALL LETTER K + 0x006C: 0x93, # LATIN SMALL LETTER L + 0x006D: 0x94, # LATIN SMALL LETTER M + 0x006E: 0x95, # LATIN SMALL LETTER N + 0x006F: 0x96, # LATIN SMALL LETTER O + 0x0070: 0x97, # LATIN SMALL LETTER P + 0x0071: 0x98, # LATIN SMALL LETTER Q + 0x0072: 0x99, # LATIN SMALL LETTER R + 0x0073: 0xA2, # LATIN SMALL LETTER S + 0x0074: 0xA3, # LATIN SMALL LETTER T + 0x0075: 0xA4, # LATIN SMALL LETTER U + 0x0076: 0xA5, # LATIN SMALL LETTER V + 0x0077: 0xA6, # LATIN SMALL LETTER W + 0x0078: 0xA7, # LATIN SMALL LETTER X + 0x0079: 0xA8, # LATIN SMALL LETTER Y + 0x007A: 0xA9, # LATIN SMALL LETTER Z + 0x007B: 0xC0, # LEFT CURLY BRACKET + 0x007C: 0x6A, # VERTICAL LINE + 0x007D: 0xD0, # RIGHT CURLY BRACKET + 0x007E: 0xA1, # TILDE + 0x007F: 0x07, # DELETE + 0x0080: 0x20, # CONTROL + 0x0081: 0x21, # CONTROL + 0x0082: 0x22, # CONTROL + 0x0083: 0x23, # CONTROL + 0x0084: 0x24, # CONTROL + 0x0085: 0x15, # CONTROL + 0x0086: 0x06, # CONTROL + 0x0087: 0x17, # CONTROL + 0x0088: 0x28, # CONTROL + 0x0089: 0x29, # CONTROL + 0x008A: 0x2A, # CONTROL + 0x008B: 0x2B, # CONTROL + 0x008C: 0x2C, # CONTROL + 0x008D: 0x09, # CONTROL + 0x008E: 0x0A, # CONTROL + 0x008F: 0x1B, # CONTROL + 0x0090: 0x30, # CONTROL + 0x0091: 0x31, # CONTROL + 0x0092: 0x1A, # CONTROL + 0x0093: 0x33, # CONTROL + 0x0094: 0x34, # CONTROL + 0x0095: 0x35, # CONTROL + 0x0096: 0x36, # CONTROL + 0x0097: 0x08, # CONTROL + 0x0098: 0x38, # CONTROL + 0x0099: 0x39, # CONTROL + 0x009A: 0x3A, # CONTROL + 0x009B: 0x3B, # CONTROL + 0x009C: 0x04, # CONTROL + 0x009D: 0x14, # CONTROL + 0x009E: 0x3E, # CONTROL + 0x009F: 0xFF, # CONTROL + 0x00A0: 0x74, # NO-BREAK SPACE + 0x00A3: 0xB0, # POUND SIGN + 0x00A6: 0xDF, # BROKEN BAR + 0x00A7: 0xEB, # SECTION SIGN + 0x00A8: 0x70, # DIAERESIS + 0x00A9: 0xFB, # COPYRIGHT SIGN + 0x00AB: 0xEE, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xEF, # NOT SIGN + 0x00AD: 0xCA, # SOFT HYPHEN + 0x00B0: 0x90, # DEGREE SIGN + 0x00B1: 0xDA, # PLUS-MINUS SIGN + 0x00B2: 0xEA, # SUPERSCRIPT TWO + 0x00B3: 0xFA, # SUPERSCRIPT THREE + 0x00B4: 0xA0, # ACUTE ACCENT + 0x00BB: 0xFE, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BD: 0xDB, # VULGAR FRACTION ONE HALF + 0x0385: 0x80, # GREEK DIALYTIKA TONOS + 0x0386: 0x71, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0387: 0xDD, # GREEK ANO TELEIA + 0x0388: 0x72, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0x73, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038A: 0x75, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038C: 0x76, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038E: 0x77, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038F: 0x78, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0xCC, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x0391: 0x41, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0x42, # GREEK CAPITAL LETTER BETA + 0x0393: 0x43, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0x44, # GREEK CAPITAL LETTER DELTA + 0x0395: 0x45, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0x46, # GREEK CAPITAL LETTER ZETA + 0x0397: 0x47, # GREEK CAPITAL LETTER ETA + 0x0398: 0x48, # GREEK CAPITAL LETTER THETA + 0x0399: 0x49, # GREEK CAPITAL LETTER IOTA + 0x039A: 0x51, # GREEK CAPITAL LETTER KAPPA + 0x039B: 0x52, # GREEK CAPITAL LETTER LAMDA + 0x039C: 0x53, # GREEK CAPITAL LETTER MU + 0x039D: 0x54, # GREEK CAPITAL LETTER NU + 0x039E: 0x55, # GREEK CAPITAL LETTER XI + 0x039F: 0x56, # GREEK CAPITAL LETTER OMICRON + 0x03A0: 0x57, # GREEK CAPITAL LETTER PI + 0x03A1: 0x58, # GREEK CAPITAL LETTER RHO + 0x03A3: 0x59, # GREEK CAPITAL LETTER SIGMA + 0x03A4: 0x62, # GREEK CAPITAL LETTER TAU + 0x03A5: 0x63, # GREEK CAPITAL LETTER UPSILON + 0x03A6: 0x64, # GREEK CAPITAL LETTER PHI + 0x03A7: 0x65, # GREEK CAPITAL LETTER CHI + 0x03A8: 0x66, # GREEK CAPITAL LETTER PSI + 0x03A9: 0x67, # GREEK CAPITAL LETTER OMEGA + 0x03AA: 0x68, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03AB: 0x69, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03AC: 0xB1, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03AD: 0xB2, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03AE: 0xB3, # GREEK SMALL LETTER ETA WITH TONOS + 0x03AF: 0xB5, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03B0: 0xCD, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03B1: 0x8A, # GREEK SMALL LETTER ALPHA + 0x03B2: 0x8B, # GREEK SMALL LETTER BETA + 0x03B3: 0x8C, # GREEK SMALL LETTER GAMMA + 0x03B4: 0x8D, # GREEK SMALL LETTER DELTA + 0x03B5: 0x8E, # GREEK SMALL LETTER EPSILON + 0x03B6: 0x8F, # GREEK SMALL LETTER ZETA + 0x03B7: 0x9A, # GREEK SMALL LETTER ETA + 0x03B8: 0x9B, # GREEK SMALL LETTER THETA + 0x03B9: 0x9C, # GREEK SMALL LETTER IOTA + 0x03BA: 0x9D, # GREEK SMALL LETTER KAPPA + 0x03BB: 0x9E, # GREEK SMALL LETTER LAMDA + 0x03BC: 0x9F, # GREEK SMALL LETTER MU + 0x03BD: 0xAA, # GREEK SMALL LETTER NU + 0x03BE: 0xAB, # GREEK SMALL LETTER XI + 0x03BF: 0xAC, # GREEK SMALL LETTER OMICRON + 0x03C0: 0xAD, # GREEK SMALL LETTER PI + 0x03C1: 0xAE, # GREEK SMALL LETTER RHO + 0x03C2: 0xBA, # GREEK SMALL LETTER FINAL SIGMA + 0x03C3: 0xAF, # GREEK SMALL LETTER SIGMA + 0x03C4: 0xBB, # GREEK SMALL LETTER TAU + 0x03C5: 0xBC, # GREEK SMALL LETTER UPSILON + 0x03C6: 0xBD, # GREEK SMALL LETTER PHI + 0x03C7: 0xBE, # GREEK SMALL LETTER CHI + 0x03C8: 0xBF, # GREEK SMALL LETTER PSI + 0x03C9: 0xCB, # GREEK SMALL LETTER OMEGA + 0x03CA: 0xB4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03CB: 0xB8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03CC: 0xB6, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03CD: 0xB7, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03CE: 0xB9, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2015: 0xCF, # HORIZONTAL BAR + 0x2018: 0xCE, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xDE, # RIGHT SINGLE QUOTATION MARK } - Modified: python/branches/ssize_t/Lib/encodings/iso8859_1.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/iso8859_1.py (original) +++ python/branches/ssize_t/Lib/encodings/iso8859_1.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic) - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN (Icelandic) - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH (Icelandic) - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic) - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic) + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN (Icelandic) + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH (Icelandic) + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic) + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # - 0x0081: 0x81, # - 0x0082: 0x82, # - 0x0083: 0x83, # - 0x0084: 0x84, # - 0x0085: 0x85, # - 0x0086: 0x86, # - 0x0087: 0x87, # - 0x0088: 0x88, # - 0x0089: 0x89, # - 0x008A: 0x8A, # - 0x008B: 0x8B, # - 0x008C: 0x8C, # - 0x008D: 0x8D, # - 0x008E: 0x8E, # - 0x008F: 0x8F, # - 0x0090: 0x90, # - 0x0091: 0x91, # - 0x0092: 0x92, # - 0x0093: 0x93, # - 0x0094: 0x94, # - 0x0095: 0x95, # - 0x0096: 0x96, # - 0x0097: 0x97, # - 0x0098: 0x98, # - 0x0099: 0x99, # - 0x009A: 0x9A, # - 0x009B: 0x9B, # - 0x009C: 0x9C, # - 0x009D: 0x9D, # - 0x009E: 0x9E, # - 0x009F: 0x9F, # - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xBF, # INVERTED QUESTION MARK - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH (Icelandic) - 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN (Icelandic) - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S (German) - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F0: 0xF0, # LATIN SMALL LETTER ETH (Icelandic) - 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FE: 0xFE, # LATIN SMALL LETTER THORN (Icelandic) - 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xBF, # INVERTED QUESTION MARK + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH (Icelandic) + 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN (Icelandic) + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S (German) + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F0: 0xF0, # LATIN SMALL LETTER ETH (Icelandic) + 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FE: 0xFE, # LATIN SMALL LETTER THORN (Icelandic) + 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS } - Modified: python/branches/ssize_t/Lib/encodings/iso8859_10.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/iso8859_10.py (original) +++ python/branches/ssize_t/Lib/encodings/iso8859_10.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u0112' # 0xA2 -> LATIN CAPITAL LETTER E WITH MACRON - u'\u0122' # 0xA3 -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u012a' # 0xA4 -> LATIN CAPITAL LETTER I WITH MACRON - u'\u0128' # 0xA5 -> LATIN CAPITAL LETTER I WITH TILDE - u'\u0136' # 0xA6 -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\xa7' # 0xA7 -> SECTION SIGN - u'\u013b' # 0xA8 -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u0110' # 0xA9 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0160' # 0xAA -> LATIN CAPITAL LETTER S WITH CARON - u'\u0166' # 0xAB -> LATIN CAPITAL LETTER T WITH STROKE - u'\u017d' # 0xAC -> LATIN CAPITAL LETTER Z WITH CARON - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u016a' # 0xAE -> LATIN CAPITAL LETTER U WITH MACRON - u'\u014a' # 0xAF -> LATIN CAPITAL LETTER ENG - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK - u'\u0113' # 0xB2 -> LATIN SMALL LETTER E WITH MACRON - u'\u0123' # 0xB3 -> LATIN SMALL LETTER G WITH CEDILLA - u'\u012b' # 0xB4 -> LATIN SMALL LETTER I WITH MACRON - u'\u0129' # 0xB5 -> LATIN SMALL LETTER I WITH TILDE - u'\u0137' # 0xB6 -> LATIN SMALL LETTER K WITH CEDILLA - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u013c' # 0xB8 -> LATIN SMALL LETTER L WITH CEDILLA - u'\u0111' # 0xB9 -> LATIN SMALL LETTER D WITH STROKE - u'\u0161' # 0xBA -> LATIN SMALL LETTER S WITH CARON - u'\u0167' # 0xBB -> LATIN SMALL LETTER T WITH STROKE - u'\u017e' # 0xBC -> LATIN SMALL LETTER Z WITH CARON - u'\u2015' # 0xBD -> HORIZONTAL BAR - u'\u016b' # 0xBE -> LATIN SMALL LETTER U WITH MACRON - u'\u014b' # 0xBF -> LATIN SMALL LETTER ENG - u'\u0100' # 0xC0 -> LATIN CAPITAL LETTER A WITH MACRON - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\u012e' # 0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u0116' # 0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic) - u'\u0145' # 0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\u014c' # 0xD2 -> LATIN CAPITAL LETTER O WITH MACRON - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\u0168' # 0xD7 -> LATIN CAPITAL LETTER U WITH TILDE - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\u0172' # 0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN (Icelandic) - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) - u'\u0101' # 0xE0 -> LATIN SMALL LETTER A WITH MACRON - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\u012f' # 0xE7 -> LATIN SMALL LETTER I WITH OGONEK - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0117' # 0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH (Icelandic) - u'\u0146' # 0xF1 -> LATIN SMALL LETTER N WITH CEDILLA - u'\u014d' # 0xF2 -> LATIN SMALL LETTER O WITH MACRON - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u0169' # 0xF7 -> LATIN SMALL LETTER U WITH TILDE - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\u0173' # 0xF9 -> LATIN SMALL LETTER U WITH OGONEK - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic) - u'\u0138' # 0xFF -> LATIN SMALL LETTER KRA + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0112' # 0xA2 -> LATIN CAPITAL LETTER E WITH MACRON + u'\u0122' # 0xA3 -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u012a' # 0xA4 -> LATIN CAPITAL LETTER I WITH MACRON + u'\u0128' # 0xA5 -> LATIN CAPITAL LETTER I WITH TILDE + u'\u0136' # 0xA6 -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\xa7' # 0xA7 -> SECTION SIGN + u'\u013b' # 0xA8 -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u0110' # 0xA9 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0160' # 0xAA -> LATIN CAPITAL LETTER S WITH CARON + u'\u0166' # 0xAB -> LATIN CAPITAL LETTER T WITH STROKE + u'\u017d' # 0xAC -> LATIN CAPITAL LETTER Z WITH CARON + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u016a' # 0xAE -> LATIN CAPITAL LETTER U WITH MACRON + u'\u014a' # 0xAF -> LATIN CAPITAL LETTER ENG + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK + u'\u0113' # 0xB2 -> LATIN SMALL LETTER E WITH MACRON + u'\u0123' # 0xB3 -> LATIN SMALL LETTER G WITH CEDILLA + u'\u012b' # 0xB4 -> LATIN SMALL LETTER I WITH MACRON + u'\u0129' # 0xB5 -> LATIN SMALL LETTER I WITH TILDE + u'\u0137' # 0xB6 -> LATIN SMALL LETTER K WITH CEDILLA + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u013c' # 0xB8 -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0111' # 0xB9 -> LATIN SMALL LETTER D WITH STROKE + u'\u0161' # 0xBA -> LATIN SMALL LETTER S WITH CARON + u'\u0167' # 0xBB -> LATIN SMALL LETTER T WITH STROKE + u'\u017e' # 0xBC -> LATIN SMALL LETTER Z WITH CARON + u'\u2015' # 0xBD -> HORIZONTAL BAR + u'\u016b' # 0xBE -> LATIN SMALL LETTER U WITH MACRON + u'\u014b' # 0xBF -> LATIN SMALL LETTER ENG + u'\u0100' # 0xC0 -> LATIN CAPITAL LETTER A WITH MACRON + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\u012e' # 0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u0116' # 0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic) + u'\u0145' # 0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\u014c' # 0xD2 -> LATIN CAPITAL LETTER O WITH MACRON + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u0168' # 0xD7 -> LATIN CAPITAL LETTER U WITH TILDE + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\u0172' # 0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN (Icelandic) + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) + u'\u0101' # 0xE0 -> LATIN SMALL LETTER A WITH MACRON + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\u012f' # 0xE7 -> LATIN SMALL LETTER I WITH OGONEK + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0117' # 0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH (Icelandic) + u'\u0146' # 0xF1 -> LATIN SMALL LETTER N WITH CEDILLA + u'\u014d' # 0xF2 -> LATIN SMALL LETTER O WITH MACRON + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u0169' # 0xF7 -> LATIN SMALL LETTER U WITH TILDE + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\u0173' # 0xF9 -> LATIN SMALL LETTER U WITH OGONEK + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic) + u'\u0138' # 0xFF -> LATIN SMALL LETTER KRA ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # - 0x0081: 0x81, # - 0x0082: 0x82, # - 0x0083: 0x83, # - 0x0084: 0x84, # - 0x0085: 0x85, # - 0x0086: 0x86, # - 0x0087: 0x87, # - 0x0088: 0x88, # - 0x0089: 0x89, # - 0x008A: 0x8A, # - 0x008B: 0x8B, # - 0x008C: 0x8C, # - 0x008D: 0x8D, # - 0x008E: 0x8E, # - 0x008F: 0x8F, # - 0x0090: 0x90, # - 0x0091: 0x91, # - 0x0092: 0x92, # - 0x0093: 0x93, # - 0x0094: 0x94, # - 0x0095: 0x95, # - 0x0096: 0x96, # - 0x0097: 0x97, # - 0x0098: 0x98, # - 0x0099: 0x99, # - 0x009A: 0x9A, # - 0x009B: 0x9B, # - 0x009C: 0x9C, # - 0x009D: 0x9D, # - 0x009E: 0x9E, # - 0x009F: 0x9F, # - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A7: 0xA7, # SECTION SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH (Icelandic) - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN (Icelandic) - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S (German) - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F0: 0xF0, # LATIN SMALL LETTER ETH (Icelandic) - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FE: 0xFE, # LATIN SMALL LETTER THORN (Icelandic) - 0x0100: 0xC0, # LATIN CAPITAL LETTER A WITH MACRON - 0x0101: 0xE0, # LATIN SMALL LETTER A WITH MACRON - 0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xB1, # LATIN SMALL LETTER A WITH OGONEK - 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON - 0x0110: 0xA9, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xB9, # LATIN SMALL LETTER D WITH STROKE - 0x0112: 0xA2, # LATIN CAPITAL LETTER E WITH MACRON - 0x0113: 0xB2, # LATIN SMALL LETTER E WITH MACRON - 0x0116: 0xCC, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0117: 0xEC, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK - 0x0122: 0xA3, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0xB3, # LATIN SMALL LETTER G WITH CEDILLA - 0x0128: 0xA5, # LATIN CAPITAL LETTER I WITH TILDE - 0x0129: 0xB5, # LATIN SMALL LETTER I WITH TILDE - 0x012A: 0xA4, # LATIN CAPITAL LETTER I WITH MACRON - 0x012B: 0xB4, # LATIN SMALL LETTER I WITH MACRON - 0x012E: 0xC7, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012F: 0xE7, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0xA6, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0xB6, # LATIN SMALL LETTER K WITH CEDILLA - 0x0138: 0xFF, # LATIN SMALL LETTER KRA - 0x013B: 0xA8, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013C: 0xB8, # LATIN SMALL LETTER L WITH CEDILLA - 0x0145: 0xD1, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0xF1, # LATIN SMALL LETTER N WITH CEDILLA - 0x014A: 0xAF, # LATIN CAPITAL LETTER ENG - 0x014B: 0xBF, # LATIN SMALL LETTER ENG - 0x014C: 0xD2, # LATIN CAPITAL LETTER O WITH MACRON - 0x014D: 0xF2, # LATIN SMALL LETTER O WITH MACRON - 0x0160: 0xAA, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xBA, # LATIN SMALL LETTER S WITH CARON - 0x0166: 0xAB, # LATIN CAPITAL LETTER T WITH STROKE - 0x0167: 0xBB, # LATIN SMALL LETTER T WITH STROKE - 0x0168: 0xD7, # LATIN CAPITAL LETTER U WITH TILDE - 0x0169: 0xF7, # LATIN SMALL LETTER U WITH TILDE - 0x016A: 0xAE, # LATIN CAPITAL LETTER U WITH MACRON - 0x016B: 0xBE, # LATIN SMALL LETTER U WITH MACRON - 0x0172: 0xD9, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0xF9, # LATIN SMALL LETTER U WITH OGONEK - 0x017D: 0xAC, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xBC, # LATIN SMALL LETTER Z WITH CARON - 0x2015: 0xBD, # HORIZONTAL BAR + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A7: 0xA7, # SECTION SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH (Icelandic) + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN (Icelandic) + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S (German) + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F0: 0xF0, # LATIN SMALL LETTER ETH (Icelandic) + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FE: 0xFE, # LATIN SMALL LETTER THORN (Icelandic) + 0x0100: 0xC0, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0xE0, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0xB1, # LATIN SMALL LETTER A WITH OGONEK + 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON + 0x0110: 0xA9, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0xB9, # LATIN SMALL LETTER D WITH STROKE + 0x0112: 0xA2, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0xB2, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0xCC, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0xEC, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0xA3, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0xB3, # LATIN SMALL LETTER G WITH CEDILLA + 0x0128: 0xA5, # LATIN CAPITAL LETTER I WITH TILDE + 0x0129: 0xB5, # LATIN SMALL LETTER I WITH TILDE + 0x012A: 0xA4, # LATIN CAPITAL LETTER I WITH MACRON + 0x012B: 0xB4, # LATIN SMALL LETTER I WITH MACRON + 0x012E: 0xC7, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012F: 0xE7, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0xA6, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0xB6, # LATIN SMALL LETTER K WITH CEDILLA + 0x0138: 0xFF, # LATIN SMALL LETTER KRA + 0x013B: 0xA8, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013C: 0xB8, # LATIN SMALL LETTER L WITH CEDILLA + 0x0145: 0xD1, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0xF1, # LATIN SMALL LETTER N WITH CEDILLA + 0x014A: 0xAF, # LATIN CAPITAL LETTER ENG + 0x014B: 0xBF, # LATIN SMALL LETTER ENG + 0x014C: 0xD2, # LATIN CAPITAL LETTER O WITH MACRON + 0x014D: 0xF2, # LATIN SMALL LETTER O WITH MACRON + 0x0160: 0xAA, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xBA, # LATIN SMALL LETTER S WITH CARON + 0x0166: 0xAB, # LATIN CAPITAL LETTER T WITH STROKE + 0x0167: 0xBB, # LATIN SMALL LETTER T WITH STROKE + 0x0168: 0xD7, # LATIN CAPITAL LETTER U WITH TILDE + 0x0169: 0xF7, # LATIN SMALL LETTER U WITH TILDE + 0x016A: 0xAE, # LATIN CAPITAL LETTER U WITH MACRON + 0x016B: 0xBE, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0xD9, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0xF9, # LATIN SMALL LETTER U WITH OGONEK + 0x017D: 0xAC, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xBC, # LATIN SMALL LETTER Z WITH CARON + 0x2015: 0xBD, # HORIZONTAL BAR } - Modified: python/branches/ssize_t/Lib/encodings/iso8859_11.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/iso8859_11.py (original) +++ python/branches/ssize_t/Lib/encodings/iso8859_11.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,258 +32,258 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI - u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI - u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT - u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI - u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON - u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG - u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU - u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN - u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING - u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG - u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO - u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE - u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING - u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA - u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK - u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN - u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO - u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO - u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN - u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK - u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO - u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG - u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN - u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG - u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU - u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI - u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA - u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG - u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA - u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN - u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN - u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO - u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA - u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK - u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA - u'\u0e24' # 0xC4 -> THAI CHARACTER RU - u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING - u'\u0e26' # 0xC6 -> THAI CHARACTER LU - u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN - u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA - u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI - u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA - u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP - u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA - u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG - u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK - u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI - u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A - u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT - u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA - u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM - u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I - u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II - u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE - u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE - u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U - u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU - u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI + u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI + u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT + u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI + u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON + u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG + u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU + u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN + u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING + u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG + u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO + u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE + u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING + u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA + u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK + u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN + u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO + u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO + u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN + u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK + u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO + u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG + u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN + u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG + u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU + u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI + u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA + u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG + u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA + u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN + u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN + u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO + u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA + u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK + u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA + u'\u0e24' # 0xC4 -> THAI CHARACTER RU + u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING + u'\u0e26' # 0xC6 -> THAI CHARACTER LU + u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN + u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA + u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI + u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA + u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP + u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA + u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG + u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK + u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI + u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A + u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT + u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA + u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM + u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I + u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II + u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE + u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE + u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U + u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU + u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' - u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT - u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E - u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE - u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O - u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN - u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI - u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO - u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK - u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU - u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK - u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO - u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI - u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA - u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT - u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT - u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN - u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN - u'\u0e50' # 0xF0 -> THAI DIGIT ZERO - u'\u0e51' # 0xF1 -> THAI DIGIT ONE - u'\u0e52' # 0xF2 -> THAI DIGIT TWO - u'\u0e53' # 0xF3 -> THAI DIGIT THREE - u'\u0e54' # 0xF4 -> THAI DIGIT FOUR - u'\u0e55' # 0xF5 -> THAI DIGIT FIVE - u'\u0e56' # 0xF6 -> THAI DIGIT SIX - u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN - u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT - u'\u0e59' # 0xF9 -> THAI DIGIT NINE - u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU - u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT + u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT + u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E + u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE + u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O + u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN + u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI + u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO + u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK + u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU + u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK + u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO + u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI + u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA + u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT + u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT + u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN + u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN + u'\u0e50' # 0xF0 -> THAI DIGIT ZERO + u'\u0e51' # 0xF1 -> THAI DIGIT ONE + u'\u0e52' # 0xF2 -> THAI DIGIT TWO + u'\u0e53' # 0xF3 -> THAI DIGIT THREE + u'\u0e54' # 0xF4 -> THAI DIGIT FOUR + u'\u0e55' # 0xF5 -> THAI DIGIT FIVE + u'\u0e56' # 0xF6 -> THAI DIGIT SIX + u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN + u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT + u'\u0e59' # 0xF9 -> THAI DIGIT NINE + u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU + u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT u'\ufffe' u'\ufffe' u'\ufffe' @@ -293,253 +293,252 @@ ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # - 0x0081: 0x81, # - 0x0082: 0x82, # - 0x0083: 0x83, # - 0x0084: 0x84, # - 0x0085: 0x85, # - 0x0086: 0x86, # - 0x0087: 0x87, # - 0x0088: 0x88, # - 0x0089: 0x89, # - 0x008A: 0x8A, # - 0x008B: 0x8B, # - 0x008C: 0x8C, # - 0x008D: 0x8D, # - 0x008E: 0x8E, # - 0x008F: 0x8F, # - 0x0090: 0x90, # - 0x0091: 0x91, # - 0x0092: 0x92, # - 0x0093: 0x93, # - 0x0094: 0x94, # - 0x0095: 0x95, # - 0x0096: 0x96, # - 0x0097: 0x97, # - 0x0098: 0x98, # - 0x0099: 0x99, # - 0x009A: 0x9A, # - 0x009B: 0x9B, # - 0x009C: 0x9C, # - 0x009D: 0x9D, # - 0x009E: 0x9E, # - 0x009F: 0x9F, # - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x0E01: 0xA1, # THAI CHARACTER KO KAI - 0x0E02: 0xA2, # THAI CHARACTER KHO KHAI - 0x0E03: 0xA3, # THAI CHARACTER KHO KHUAT - 0x0E04: 0xA4, # THAI CHARACTER KHO KHWAI - 0x0E05: 0xA5, # THAI CHARACTER KHO KHON - 0x0E06: 0xA6, # THAI CHARACTER KHO RAKHANG - 0x0E07: 0xA7, # THAI CHARACTER NGO NGU - 0x0E08: 0xA8, # THAI CHARACTER CHO CHAN - 0x0E09: 0xA9, # THAI CHARACTER CHO CHING - 0x0E0A: 0xAA, # THAI CHARACTER CHO CHANG - 0x0E0B: 0xAB, # THAI CHARACTER SO SO - 0x0E0C: 0xAC, # THAI CHARACTER CHO CHOE - 0x0E0D: 0xAD, # THAI CHARACTER YO YING - 0x0E0E: 0xAE, # THAI CHARACTER DO CHADA - 0x0E0F: 0xAF, # THAI CHARACTER TO PATAK - 0x0E10: 0xB0, # THAI CHARACTER THO THAN - 0x0E11: 0xB1, # THAI CHARACTER THO NANGMONTHO - 0x0E12: 0xB2, # THAI CHARACTER THO PHUTHAO - 0x0E13: 0xB3, # THAI CHARACTER NO NEN - 0x0E14: 0xB4, # THAI CHARACTER DO DEK - 0x0E15: 0xB5, # THAI CHARACTER TO TAO - 0x0E16: 0xB6, # THAI CHARACTER THO THUNG - 0x0E17: 0xB7, # THAI CHARACTER THO THAHAN - 0x0E18: 0xB8, # THAI CHARACTER THO THONG - 0x0E19: 0xB9, # THAI CHARACTER NO NU - 0x0E1A: 0xBA, # THAI CHARACTER BO BAIMAI - 0x0E1B: 0xBB, # THAI CHARACTER PO PLA - 0x0E1C: 0xBC, # THAI CHARACTER PHO PHUNG - 0x0E1D: 0xBD, # THAI CHARACTER FO FA - 0x0E1E: 0xBE, # THAI CHARACTER PHO PHAN - 0x0E1F: 0xBF, # THAI CHARACTER FO FAN - 0x0E20: 0xC0, # THAI CHARACTER PHO SAMPHAO - 0x0E21: 0xC1, # THAI CHARACTER MO MA - 0x0E22: 0xC2, # THAI CHARACTER YO YAK - 0x0E23: 0xC3, # THAI CHARACTER RO RUA - 0x0E24: 0xC4, # THAI CHARACTER RU - 0x0E25: 0xC5, # THAI CHARACTER LO LING - 0x0E26: 0xC6, # THAI CHARACTER LU - 0x0E27: 0xC7, # THAI CHARACTER WO WAEN - 0x0E28: 0xC8, # THAI CHARACTER SO SALA - 0x0E29: 0xC9, # THAI CHARACTER SO RUSI - 0x0E2A: 0xCA, # THAI CHARACTER SO SUA - 0x0E2B: 0xCB, # THAI CHARACTER HO HIP - 0x0E2C: 0xCC, # THAI CHARACTER LO CHULA - 0x0E2D: 0xCD, # THAI CHARACTER O ANG - 0x0E2E: 0xCE, # THAI CHARACTER HO NOKHUK - 0x0E2F: 0xCF, # THAI CHARACTER PAIYANNOI - 0x0E30: 0xD0, # THAI CHARACTER SARA A - 0x0E31: 0xD1, # THAI CHARACTER MAI HAN-AKAT - 0x0E32: 0xD2, # THAI CHARACTER SARA AA - 0x0E33: 0xD3, # THAI CHARACTER SARA AM - 0x0E34: 0xD4, # THAI CHARACTER SARA I - 0x0E35: 0xD5, # THAI CHARACTER SARA II - 0x0E36: 0xD6, # THAI CHARACTER SARA UE - 0x0E37: 0xD7, # THAI CHARACTER SARA UEE - 0x0E38: 0xD8, # THAI CHARACTER SARA U - 0x0E39: 0xD9, # THAI CHARACTER SARA UU - 0x0E3A: 0xDA, # THAI CHARACTER PHINTHU - 0x0E3F: 0xDF, # THAI CURRENCY SYMBOL BAHT - 0x0E40: 0xE0, # THAI CHARACTER SARA E - 0x0E41: 0xE1, # THAI CHARACTER SARA AE - 0x0E42: 0xE2, # THAI CHARACTER SARA O - 0x0E43: 0xE3, # THAI CHARACTER SARA AI MAIMUAN - 0x0E44: 0xE4, # THAI CHARACTER SARA AI MAIMALAI - 0x0E45: 0xE5, # THAI CHARACTER LAKKHANGYAO - 0x0E46: 0xE6, # THAI CHARACTER MAIYAMOK - 0x0E47: 0xE7, # THAI CHARACTER MAITAIKHU - 0x0E48: 0xE8, # THAI CHARACTER MAI EK - 0x0E49: 0xE9, # THAI CHARACTER MAI THO - 0x0E4A: 0xEA, # THAI CHARACTER MAI TRI - 0x0E4B: 0xEB, # THAI CHARACTER MAI CHATTAWA - 0x0E4C: 0xEC, # THAI CHARACTER THANTHAKHAT - 0x0E4D: 0xED, # THAI CHARACTER NIKHAHIT - 0x0E4E: 0xEE, # THAI CHARACTER YAMAKKAN - 0x0E4F: 0xEF, # THAI CHARACTER FONGMAN - 0x0E50: 0xF0, # THAI DIGIT ZERO - 0x0E51: 0xF1, # THAI DIGIT ONE - 0x0E52: 0xF2, # THAI DIGIT TWO - 0x0E53: 0xF3, # THAI DIGIT THREE - 0x0E54: 0xF4, # THAI DIGIT FOUR - 0x0E55: 0xF5, # THAI DIGIT FIVE - 0x0E56: 0xF6, # THAI DIGIT SIX - 0x0E57: 0xF7, # THAI DIGIT SEVEN - 0x0E58: 0xF8, # THAI DIGIT EIGHT - 0x0E59: 0xF9, # THAI DIGIT NINE - 0x0E5A: 0xFA, # THAI CHARACTER ANGKHANKHU - 0x0E5B: 0xFB, # THAI CHARACTER KHOMUT + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x0E01: 0xA1, # THAI CHARACTER KO KAI + 0x0E02: 0xA2, # THAI CHARACTER KHO KHAI + 0x0E03: 0xA3, # THAI CHARACTER KHO KHUAT + 0x0E04: 0xA4, # THAI CHARACTER KHO KHWAI + 0x0E05: 0xA5, # THAI CHARACTER KHO KHON + 0x0E06: 0xA6, # THAI CHARACTER KHO RAKHANG + 0x0E07: 0xA7, # THAI CHARACTER NGO NGU + 0x0E08: 0xA8, # THAI CHARACTER CHO CHAN + 0x0E09: 0xA9, # THAI CHARACTER CHO CHING + 0x0E0A: 0xAA, # THAI CHARACTER CHO CHANG + 0x0E0B: 0xAB, # THAI CHARACTER SO SO + 0x0E0C: 0xAC, # THAI CHARACTER CHO CHOE + 0x0E0D: 0xAD, # THAI CHARACTER YO YING + 0x0E0E: 0xAE, # THAI CHARACTER DO CHADA + 0x0E0F: 0xAF, # THAI CHARACTER TO PATAK + 0x0E10: 0xB0, # THAI CHARACTER THO THAN + 0x0E11: 0xB1, # THAI CHARACTER THO NANGMONTHO + 0x0E12: 0xB2, # THAI CHARACTER THO PHUTHAO + 0x0E13: 0xB3, # THAI CHARACTER NO NEN + 0x0E14: 0xB4, # THAI CHARACTER DO DEK + 0x0E15: 0xB5, # THAI CHARACTER TO TAO + 0x0E16: 0xB6, # THAI CHARACTER THO THUNG + 0x0E17: 0xB7, # THAI CHARACTER THO THAHAN + 0x0E18: 0xB8, # THAI CHARACTER THO THONG + 0x0E19: 0xB9, # THAI CHARACTER NO NU + 0x0E1A: 0xBA, # THAI CHARACTER BO BAIMAI + 0x0E1B: 0xBB, # THAI CHARACTER PO PLA + 0x0E1C: 0xBC, # THAI CHARACTER PHO PHUNG + 0x0E1D: 0xBD, # THAI CHARACTER FO FA + 0x0E1E: 0xBE, # THAI CHARACTER PHO PHAN + 0x0E1F: 0xBF, # THAI CHARACTER FO FAN + 0x0E20: 0xC0, # THAI CHARACTER PHO SAMPHAO + 0x0E21: 0xC1, # THAI CHARACTER MO MA + 0x0E22: 0xC2, # THAI CHARACTER YO YAK + 0x0E23: 0xC3, # THAI CHARACTER RO RUA + 0x0E24: 0xC4, # THAI CHARACTER RU + 0x0E25: 0xC5, # THAI CHARACTER LO LING + 0x0E26: 0xC6, # THAI CHARACTER LU + 0x0E27: 0xC7, # THAI CHARACTER WO WAEN + 0x0E28: 0xC8, # THAI CHARACTER SO SALA + 0x0E29: 0xC9, # THAI CHARACTER SO RUSI + 0x0E2A: 0xCA, # THAI CHARACTER SO SUA + 0x0E2B: 0xCB, # THAI CHARACTER HO HIP + 0x0E2C: 0xCC, # THAI CHARACTER LO CHULA + 0x0E2D: 0xCD, # THAI CHARACTER O ANG + 0x0E2E: 0xCE, # THAI CHARACTER HO NOKHUK + 0x0E2F: 0xCF, # THAI CHARACTER PAIYANNOI + 0x0E30: 0xD0, # THAI CHARACTER SARA A + 0x0E31: 0xD1, # THAI CHARACTER MAI HAN-AKAT + 0x0E32: 0xD2, # THAI CHARACTER SARA AA + 0x0E33: 0xD3, # THAI CHARACTER SARA AM + 0x0E34: 0xD4, # THAI CHARACTER SARA I + 0x0E35: 0xD5, # THAI CHARACTER SARA II + 0x0E36: 0xD6, # THAI CHARACTER SARA UE + 0x0E37: 0xD7, # THAI CHARACTER SARA UEE + 0x0E38: 0xD8, # THAI CHARACTER SARA U + 0x0E39: 0xD9, # THAI CHARACTER SARA UU + 0x0E3A: 0xDA, # THAI CHARACTER PHINTHU + 0x0E3F: 0xDF, # THAI CURRENCY SYMBOL BAHT + 0x0E40: 0xE0, # THAI CHARACTER SARA E + 0x0E41: 0xE1, # THAI CHARACTER SARA AE + 0x0E42: 0xE2, # THAI CHARACTER SARA O + 0x0E43: 0xE3, # THAI CHARACTER SARA AI MAIMUAN + 0x0E44: 0xE4, # THAI CHARACTER SARA AI MAIMALAI + 0x0E45: 0xE5, # THAI CHARACTER LAKKHANGYAO + 0x0E46: 0xE6, # THAI CHARACTER MAIYAMOK + 0x0E47: 0xE7, # THAI CHARACTER MAITAIKHU + 0x0E48: 0xE8, # THAI CHARACTER MAI EK + 0x0E49: 0xE9, # THAI CHARACTER MAI THO + 0x0E4A: 0xEA, # THAI CHARACTER MAI TRI + 0x0E4B: 0xEB, # THAI CHARACTER MAI CHATTAWA + 0x0E4C: 0xEC, # THAI CHARACTER THANTHAKHAT + 0x0E4D: 0xED, # THAI CHARACTER NIKHAHIT + 0x0E4E: 0xEE, # THAI CHARACTER YAMAKKAN + 0x0E4F: 0xEF, # THAI CHARACTER FONGMAN + 0x0E50: 0xF0, # THAI DIGIT ZERO + 0x0E51: 0xF1, # THAI DIGIT ONE + 0x0E52: 0xF2, # THAI DIGIT TWO + 0x0E53: 0xF3, # THAI DIGIT THREE + 0x0E54: 0xF4, # THAI DIGIT FOUR + 0x0E55: 0xF5, # THAI DIGIT FIVE + 0x0E56: 0xF6, # THAI DIGIT SIX + 0x0E57: 0xF7, # THAI DIGIT SEVEN + 0x0E58: 0xF8, # THAI DIGIT EIGHT + 0x0E59: 0xF9, # THAI DIGIT NINE + 0x0E5A: 0xFA, # THAI CHARACTER ANGKHANKHU + 0x0E5B: 0xFB, # THAI CHARACTER KHOMUT } - Modified: python/branches/ssize_t/Lib/encodings/iso8859_13.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/iso8859_13.py (original) +++ python/branches/ssize_t/Lib/encodings/iso8859_13.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u201d' # 0xA1 -> RIGHT DOUBLE QUOTATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\u201e' # 0xA5 -> DOUBLE LOW-9 QUOTATION MARK - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xd8' # 0xA8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u0156' # 0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xc6' # 0xAF -> LATIN CAPITAL LETTER AE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\u201c' # 0xB4 -> LEFT DOUBLE QUOTATION MARK - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xf8' # 0xB8 -> LATIN SMALL LETTER O WITH STROKE - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\u0157' # 0xBA -> LATIN SMALL LETTER R WITH CEDILLA - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xe6' # 0xBF -> LATIN SMALL LETTER AE - u'\u0104' # 0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u012e' # 0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u0100' # 0xC2 -> LATIN CAPITAL LETTER A WITH MACRON - u'\u0106' # 0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\u0118' # 0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u0112' # 0xC7 -> LATIN CAPITAL LETTER E WITH MACRON - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0179' # 0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\u0116' # 0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\u0122' # 0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u0136' # 0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\u012a' # 0xCE -> LATIN CAPITAL LETTER I WITH MACRON - u'\u013b' # 0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u0160' # 0xD0 -> LATIN CAPITAL LETTER S WITH CARON - u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0145' # 0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\u014c' # 0xD4 -> LATIN CAPITAL LETTER O WITH MACRON - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u0172' # 0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\u0141' # 0xD9 -> LATIN CAPITAL LETTER L WITH STROKE - u'\u015a' # 0xDA -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u016a' # 0xDB -> LATIN CAPITAL LETTER U WITH MACRON - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u017b' # 0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u017d' # 0xDE -> LATIN CAPITAL LETTER Z WITH CARON - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) - u'\u0105' # 0xE0 -> LATIN SMALL LETTER A WITH OGONEK - u'\u012f' # 0xE1 -> LATIN SMALL LETTER I WITH OGONEK - u'\u0101' # 0xE2 -> LATIN SMALL LETTER A WITH MACRON - u'\u0107' # 0xE3 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\u0119' # 0xE6 -> LATIN SMALL LETTER E WITH OGONEK - u'\u0113' # 0xE7 -> LATIN SMALL LETTER E WITH MACRON - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u017a' # 0xEA -> LATIN SMALL LETTER Z WITH ACUTE - u'\u0117' # 0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\u0123' # 0xEC -> LATIN SMALL LETTER G WITH CEDILLA - u'\u0137' # 0xED -> LATIN SMALL LETTER K WITH CEDILLA - u'\u012b' # 0xEE -> LATIN SMALL LETTER I WITH MACRON - u'\u013c' # 0xEF -> LATIN SMALL LETTER L WITH CEDILLA - u'\u0161' # 0xF0 -> LATIN SMALL LETTER S WITH CARON - u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0146' # 0xF2 -> LATIN SMALL LETTER N WITH CEDILLA - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\u014d' # 0xF4 -> LATIN SMALL LETTER O WITH MACRON - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u0173' # 0xF8 -> LATIN SMALL LETTER U WITH OGONEK - u'\u0142' # 0xF9 -> LATIN SMALL LETTER L WITH STROKE - u'\u015b' # 0xFA -> LATIN SMALL LETTER S WITH ACUTE - u'\u016b' # 0xFB -> LATIN SMALL LETTER U WITH MACRON - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u017e' # 0xFE -> LATIN SMALL LETTER Z WITH CARON - u'\u2019' # 0xFF -> RIGHT SINGLE QUOTATION MARK + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u201d' # 0xA1 -> RIGHT DOUBLE QUOTATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\u201e' # 0xA5 -> DOUBLE LOW-9 QUOTATION MARK + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xd8' # 0xA8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u0156' # 0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xc6' # 0xAF -> LATIN CAPITAL LETTER AE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\u201c' # 0xB4 -> LEFT DOUBLE QUOTATION MARK + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xf8' # 0xB8 -> LATIN SMALL LETTER O WITH STROKE + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\u0157' # 0xBA -> LATIN SMALL LETTER R WITH CEDILLA + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xe6' # 0xBF -> LATIN SMALL LETTER AE + u'\u0104' # 0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u012e' # 0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u0100' # 0xC2 -> LATIN CAPITAL LETTER A WITH MACRON + u'\u0106' # 0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\u0118' # 0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0112' # 0xC7 -> LATIN CAPITAL LETTER E WITH MACRON + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0179' # 0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\u0116' # 0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\u0122' # 0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u0136' # 0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\u012a' # 0xCE -> LATIN CAPITAL LETTER I WITH MACRON + u'\u013b' # 0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u0160' # 0xD0 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0145' # 0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\u014c' # 0xD4 -> LATIN CAPITAL LETTER O WITH MACRON + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u0172' # 0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\u0141' # 0xD9 -> LATIN CAPITAL LETTER L WITH STROKE + u'\u015a' # 0xDA -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u016a' # 0xDB -> LATIN CAPITAL LETTER U WITH MACRON + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u017b' # 0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017d' # 0xDE -> LATIN CAPITAL LETTER Z WITH CARON + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) + u'\u0105' # 0xE0 -> LATIN SMALL LETTER A WITH OGONEK + u'\u012f' # 0xE1 -> LATIN SMALL LETTER I WITH OGONEK + u'\u0101' # 0xE2 -> LATIN SMALL LETTER A WITH MACRON + u'\u0107' # 0xE3 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\u0119' # 0xE6 -> LATIN SMALL LETTER E WITH OGONEK + u'\u0113' # 0xE7 -> LATIN SMALL LETTER E WITH MACRON + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u017a' # 0xEA -> LATIN SMALL LETTER Z WITH ACUTE + u'\u0117' # 0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\u0123' # 0xEC -> LATIN SMALL LETTER G WITH CEDILLA + u'\u0137' # 0xED -> LATIN SMALL LETTER K WITH CEDILLA + u'\u012b' # 0xEE -> LATIN SMALL LETTER I WITH MACRON + u'\u013c' # 0xEF -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0161' # 0xF0 -> LATIN SMALL LETTER S WITH CARON + u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0146' # 0xF2 -> LATIN SMALL LETTER N WITH CEDILLA + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\u014d' # 0xF4 -> LATIN SMALL LETTER O WITH MACRON + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u0173' # 0xF8 -> LATIN SMALL LETTER U WITH OGONEK + u'\u0142' # 0xF9 -> LATIN SMALL LETTER L WITH STROKE + u'\u015b' # 0xFA -> LATIN SMALL LETTER S WITH ACUTE + u'\u016b' # 0xFB -> LATIN SMALL LETTER U WITH MACRON + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u017e' # 0xFE -> LATIN SMALL LETTER Z WITH CARON + u'\u2019' # 0xFF -> RIGHT SINGLE QUOTATION MARK ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # - 0x0081: 0x81, # - 0x0082: 0x82, # - 0x0083: 0x83, # - 0x0084: 0x84, # - 0x0085: 0x85, # - 0x0086: 0x86, # - 0x0087: 0x87, # - 0x0088: 0x88, # - 0x0089: 0x89, # - 0x008A: 0x8A, # - 0x008B: 0x8B, # - 0x008C: 0x8C, # - 0x008D: 0x8D, # - 0x008E: 0x8E, # - 0x008F: 0x8F, # - 0x0090: 0x90, # - 0x0091: 0x91, # - 0x0092: 0x92, # - 0x0093: 0x93, # - 0x0094: 0x94, # - 0x0095: 0x95, # - 0x0096: 0x96, # - 0x0097: 0x97, # - 0x0098: 0x98, # - 0x0099: 0x99, # - 0x009A: 0x9A, # - 0x009B: 0x9B, # - 0x009C: 0x9C, # - 0x009D: 0x9D, # - 0x009E: 0x9E, # - 0x009F: 0x9F, # - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xAF, # LATIN CAPITAL LETTER AE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xA8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S (German) - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xBF, # LATIN SMALL LETTER AE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xB8, # LATIN SMALL LETTER O WITH STROKE - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0100: 0xC2, # LATIN CAPITAL LETTER A WITH MACRON - 0x0101: 0xE2, # LATIN SMALL LETTER A WITH MACRON - 0x0104: 0xC0, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xE0, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0xC3, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xE3, # LATIN SMALL LETTER C WITH ACUTE - 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON - 0x0112: 0xC7, # LATIN CAPITAL LETTER E WITH MACRON - 0x0113: 0xE7, # LATIN SMALL LETTER E WITH MACRON - 0x0116: 0xCB, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0117: 0xEB, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0xC6, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xE6, # LATIN SMALL LETTER E WITH OGONEK - 0x0122: 0xCC, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0xEC, # LATIN SMALL LETTER G WITH CEDILLA - 0x012A: 0xCE, # LATIN CAPITAL LETTER I WITH MACRON - 0x012B: 0xEE, # LATIN SMALL LETTER I WITH MACRON - 0x012E: 0xC1, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012F: 0xE1, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0xCD, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0xED, # LATIN SMALL LETTER K WITH CEDILLA - 0x013B: 0xCF, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013C: 0xEF, # LATIN SMALL LETTER L WITH CEDILLA - 0x0141: 0xD9, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xF9, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE - 0x0145: 0xD2, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0xF2, # LATIN SMALL LETTER N WITH CEDILLA - 0x014C: 0xD4, # LATIN CAPITAL LETTER O WITH MACRON - 0x014D: 0xF4, # LATIN SMALL LETTER O WITH MACRON - 0x0156: 0xAA, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x0157: 0xBA, # LATIN SMALL LETTER R WITH CEDILLA - 0x015A: 0xDA, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015B: 0xFA, # LATIN SMALL LETTER S WITH ACUTE - 0x0160: 0xD0, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xF0, # LATIN SMALL LETTER S WITH CARON - 0x016A: 0xDB, # LATIN CAPITAL LETTER U WITH MACRON - 0x016B: 0xFB, # LATIN SMALL LETTER U WITH MACRON - 0x0172: 0xD8, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0xF8, # LATIN SMALL LETTER U WITH OGONEK - 0x0179: 0xCA, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017A: 0xEA, # LATIN SMALL LETTER Z WITH ACUTE - 0x017B: 0xDD, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017C: 0xFD, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017D: 0xDE, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xFE, # LATIN SMALL LETTER Z WITH CARON - 0x2019: 0xFF, # RIGHT SINGLE QUOTATION MARK - 0x201C: 0xB4, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xA1, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xA5, # DOUBLE LOW-9 QUOTATION MARK + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xAF, # LATIN CAPITAL LETTER AE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xA8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S (German) + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xBF, # LATIN SMALL LETTER AE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xB8, # LATIN SMALL LETTER O WITH STROKE + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0100: 0xC2, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0xE2, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0xC0, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0xE0, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0xC3, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0xE3, # LATIN SMALL LETTER C WITH ACUTE + 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON + 0x0112: 0xC7, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0xE7, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0xCB, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0xEB, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0xC6, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0xE6, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0xCC, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0xEC, # LATIN SMALL LETTER G WITH CEDILLA + 0x012A: 0xCE, # LATIN CAPITAL LETTER I WITH MACRON + 0x012B: 0xEE, # LATIN SMALL LETTER I WITH MACRON + 0x012E: 0xC1, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012F: 0xE1, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0xCD, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0xED, # LATIN SMALL LETTER K WITH CEDILLA + 0x013B: 0xCF, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013C: 0xEF, # LATIN SMALL LETTER L WITH CEDILLA + 0x0141: 0xD9, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0xF9, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE + 0x0145: 0xD2, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0xF2, # LATIN SMALL LETTER N WITH CEDILLA + 0x014C: 0xD4, # LATIN CAPITAL LETTER O WITH MACRON + 0x014D: 0xF4, # LATIN SMALL LETTER O WITH MACRON + 0x0156: 0xAA, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0xBA, # LATIN SMALL LETTER R WITH CEDILLA + 0x015A: 0xDA, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015B: 0xFA, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0xD0, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xF0, # LATIN SMALL LETTER S WITH CARON + 0x016A: 0xDB, # LATIN CAPITAL LETTER U WITH MACRON + 0x016B: 0xFB, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0xD8, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0xF8, # LATIN SMALL LETTER U WITH OGONEK + 0x0179: 0xCA, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017A: 0xEA, # LATIN SMALL LETTER Z WITH ACUTE + 0x017B: 0xDD, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017C: 0xFD, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017D: 0xDE, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xFE, # LATIN SMALL LETTER Z WITH CARON + 0x2019: 0xFF, # RIGHT SINGLE QUOTATION MARK + 0x201C: 0xB4, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xA1, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xA5, # DOUBLE LOW-9 QUOTATION MARK } - Modified: python/branches/ssize_t/Lib/encodings/iso8859_14.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/iso8859_14.py (original) +++ python/branches/ssize_t/Lib/encodings/iso8859_14.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u1e02' # 0xA1 -> LATIN CAPITAL LETTER B WITH DOT ABOVE - u'\u1e03' # 0xA2 -> LATIN SMALL LETTER B WITH DOT ABOVE - u'\xa3' # 0xA3 -> POUND SIGN - u'\u010a' # 0xA4 -> LATIN CAPITAL LETTER C WITH DOT ABOVE - u'\u010b' # 0xA5 -> LATIN SMALL LETTER C WITH DOT ABOVE - u'\u1e0a' # 0xA6 -> LATIN CAPITAL LETTER D WITH DOT ABOVE - u'\xa7' # 0xA7 -> SECTION SIGN - u'\u1e80' # 0xA8 -> LATIN CAPITAL LETTER W WITH GRAVE - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u1e82' # 0xAA -> LATIN CAPITAL LETTER W WITH ACUTE - u'\u1e0b' # 0xAB -> LATIN SMALL LETTER D WITH DOT ABOVE - u'\u1ef2' # 0xAC -> LATIN CAPITAL LETTER Y WITH GRAVE - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\u0178' # 0xAF -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u1e1e' # 0xB0 -> LATIN CAPITAL LETTER F WITH DOT ABOVE - u'\u1e1f' # 0xB1 -> LATIN SMALL LETTER F WITH DOT ABOVE - u'\u0120' # 0xB2 -> LATIN CAPITAL LETTER G WITH DOT ABOVE - u'\u0121' # 0xB3 -> LATIN SMALL LETTER G WITH DOT ABOVE - u'\u1e40' # 0xB4 -> LATIN CAPITAL LETTER M WITH DOT ABOVE - u'\u1e41' # 0xB5 -> LATIN SMALL LETTER M WITH DOT ABOVE - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\u1e56' # 0xB7 -> LATIN CAPITAL LETTER P WITH DOT ABOVE - u'\u1e81' # 0xB8 -> LATIN SMALL LETTER W WITH GRAVE - u'\u1e57' # 0xB9 -> LATIN SMALL LETTER P WITH DOT ABOVE - u'\u1e83' # 0xBA -> LATIN SMALL LETTER W WITH ACUTE - u'\u1e60' # 0xBB -> LATIN CAPITAL LETTER S WITH DOT ABOVE - u'\u1ef3' # 0xBC -> LATIN SMALL LETTER Y WITH GRAVE - u'\u1e84' # 0xBD -> LATIN CAPITAL LETTER W WITH DIAERESIS - u'\u1e85' # 0xBE -> LATIN SMALL LETTER W WITH DIAERESIS - u'\u1e61' # 0xBF -> LATIN SMALL LETTER S WITH DOT ABOVE - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u0174' # 0xD0 -> LATIN CAPITAL LETTER W WITH CIRCUMFLEX - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\u1e6a' # 0xD7 -> LATIN CAPITAL LETTER T WITH DOT ABOVE - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\u0176' # 0xDE -> LATIN CAPITAL LETTER Y WITH CIRCUMFLEX - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u0175' # 0xF0 -> LATIN SMALL LETTER W WITH CIRCUMFLEX - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u1e6b' # 0xF7 -> LATIN SMALL LETTER T WITH DOT ABOVE - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\u0177' # 0xFE -> LATIN SMALL LETTER Y WITH CIRCUMFLEX - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u1e02' # 0xA1 -> LATIN CAPITAL LETTER B WITH DOT ABOVE + u'\u1e03' # 0xA2 -> LATIN SMALL LETTER B WITH DOT ABOVE + u'\xa3' # 0xA3 -> POUND SIGN + u'\u010a' # 0xA4 -> LATIN CAPITAL LETTER C WITH DOT ABOVE + u'\u010b' # 0xA5 -> LATIN SMALL LETTER C WITH DOT ABOVE + u'\u1e0a' # 0xA6 -> LATIN CAPITAL LETTER D WITH DOT ABOVE + u'\xa7' # 0xA7 -> SECTION SIGN + u'\u1e80' # 0xA8 -> LATIN CAPITAL LETTER W WITH GRAVE + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u1e82' # 0xAA -> LATIN CAPITAL LETTER W WITH ACUTE + u'\u1e0b' # 0xAB -> LATIN SMALL LETTER D WITH DOT ABOVE + u'\u1ef2' # 0xAC -> LATIN CAPITAL LETTER Y WITH GRAVE + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\u0178' # 0xAF -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u1e1e' # 0xB0 -> LATIN CAPITAL LETTER F WITH DOT ABOVE + u'\u1e1f' # 0xB1 -> LATIN SMALL LETTER F WITH DOT ABOVE + u'\u0120' # 0xB2 -> LATIN CAPITAL LETTER G WITH DOT ABOVE + u'\u0121' # 0xB3 -> LATIN SMALL LETTER G WITH DOT ABOVE + u'\u1e40' # 0xB4 -> LATIN CAPITAL LETTER M WITH DOT ABOVE + u'\u1e41' # 0xB5 -> LATIN SMALL LETTER M WITH DOT ABOVE + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\u1e56' # 0xB7 -> LATIN CAPITAL LETTER P WITH DOT ABOVE + u'\u1e81' # 0xB8 -> LATIN SMALL LETTER W WITH GRAVE + u'\u1e57' # 0xB9 -> LATIN SMALL LETTER P WITH DOT ABOVE + u'\u1e83' # 0xBA -> LATIN SMALL LETTER W WITH ACUTE + u'\u1e60' # 0xBB -> LATIN CAPITAL LETTER S WITH DOT ABOVE + u'\u1ef3' # 0xBC -> LATIN SMALL LETTER Y WITH GRAVE + u'\u1e84' # 0xBD -> LATIN CAPITAL LETTER W WITH DIAERESIS + u'\u1e85' # 0xBE -> LATIN SMALL LETTER W WITH DIAERESIS + u'\u1e61' # 0xBF -> LATIN SMALL LETTER S WITH DOT ABOVE + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u0174' # 0xD0 -> LATIN CAPITAL LETTER W WITH CIRCUMFLEX + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u1e6a' # 0xD7 -> LATIN CAPITAL LETTER T WITH DOT ABOVE + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0176' # 0xDE -> LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u0175' # 0xF0 -> LATIN SMALL LETTER W WITH CIRCUMFLEX + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u1e6b' # 0xF7 -> LATIN SMALL LETTER T WITH DOT ABOVE + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0177' # 0xFE -> LATIN SMALL LETTER Y WITH CIRCUMFLEX + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # - 0x0081: 0x81, # - 0x0082: 0x82, # - 0x0083: 0x83, # - 0x0084: 0x84, # - 0x0085: 0x85, # - 0x0086: 0x86, # - 0x0087: 0x87, # - 0x0088: 0x88, # - 0x0089: 0x89, # - 0x008A: 0x8A, # - 0x008B: 0x8B, # - 0x008C: 0x8C, # - 0x008D: 0x8D, # - 0x008E: 0x8E, # - 0x008F: 0x8F, # - 0x0090: 0x90, # - 0x0091: 0x91, # - 0x0092: 0x92, # - 0x0093: 0x93, # - 0x0094: 0x94, # - 0x0095: 0x95, # - 0x0096: 0x96, # - 0x0097: 0x97, # - 0x0098: 0x98, # - 0x0099: 0x99, # - 0x009A: 0x9A, # - 0x009B: 0x9B, # - 0x009C: 0x9C, # - 0x009D: 0x9D, # - 0x009E: 0x9E, # - 0x009F: 0x9F, # - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A3: 0xA3, # POUND SIGN - 0x00A7: 0xA7, # SECTION SIGN - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x010A: 0xA4, # LATIN CAPITAL LETTER C WITH DOT ABOVE - 0x010B: 0xA5, # LATIN SMALL LETTER C WITH DOT ABOVE - 0x0120: 0xB2, # LATIN CAPITAL LETTER G WITH DOT ABOVE - 0x0121: 0xB3, # LATIN SMALL LETTER G WITH DOT ABOVE - 0x0174: 0xD0, # LATIN CAPITAL LETTER W WITH CIRCUMFLEX - 0x0175: 0xF0, # LATIN SMALL LETTER W WITH CIRCUMFLEX - 0x0176: 0xDE, # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX - 0x0177: 0xFE, # LATIN SMALL LETTER Y WITH CIRCUMFLEX - 0x0178: 0xAF, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x1E02: 0xA1, # LATIN CAPITAL LETTER B WITH DOT ABOVE - 0x1E03: 0xA2, # LATIN SMALL LETTER B WITH DOT ABOVE - 0x1E0A: 0xA6, # LATIN CAPITAL LETTER D WITH DOT ABOVE - 0x1E0B: 0xAB, # LATIN SMALL LETTER D WITH DOT ABOVE - 0x1E1E: 0xB0, # LATIN CAPITAL LETTER F WITH DOT ABOVE - 0x1E1F: 0xB1, # LATIN SMALL LETTER F WITH DOT ABOVE - 0x1E40: 0xB4, # LATIN CAPITAL LETTER M WITH DOT ABOVE - 0x1E41: 0xB5, # LATIN SMALL LETTER M WITH DOT ABOVE - 0x1E56: 0xB7, # LATIN CAPITAL LETTER P WITH DOT ABOVE - 0x1E57: 0xB9, # LATIN SMALL LETTER P WITH DOT ABOVE - 0x1E60: 0xBB, # LATIN CAPITAL LETTER S WITH DOT ABOVE - 0x1E61: 0xBF, # LATIN SMALL LETTER S WITH DOT ABOVE - 0x1E6A: 0xD7, # LATIN CAPITAL LETTER T WITH DOT ABOVE - 0x1E6B: 0xF7, # LATIN SMALL LETTER T WITH DOT ABOVE - 0x1E80: 0xA8, # LATIN CAPITAL LETTER W WITH GRAVE - 0x1E81: 0xB8, # LATIN SMALL LETTER W WITH GRAVE - 0x1E82: 0xAA, # LATIN CAPITAL LETTER W WITH ACUTE - 0x1E83: 0xBA, # LATIN SMALL LETTER W WITH ACUTE - 0x1E84: 0xBD, # LATIN CAPITAL LETTER W WITH DIAERESIS - 0x1E85: 0xBE, # LATIN SMALL LETTER W WITH DIAERESIS - 0x1EF2: 0xAC, # LATIN CAPITAL LETTER Y WITH GRAVE - 0x1EF3: 0xBC, # LATIN SMALL LETTER Y WITH GRAVE + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A3: 0xA3, # POUND SIGN + 0x00A7: 0xA7, # SECTION SIGN + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x010A: 0xA4, # LATIN CAPITAL LETTER C WITH DOT ABOVE + 0x010B: 0xA5, # LATIN SMALL LETTER C WITH DOT ABOVE + 0x0120: 0xB2, # LATIN CAPITAL LETTER G WITH DOT ABOVE + 0x0121: 0xB3, # LATIN SMALL LETTER G WITH DOT ABOVE + 0x0174: 0xD0, # LATIN CAPITAL LETTER W WITH CIRCUMFLEX + 0x0175: 0xF0, # LATIN SMALL LETTER W WITH CIRCUMFLEX + 0x0176: 0xDE, # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + 0x0177: 0xFE, # LATIN SMALL LETTER Y WITH CIRCUMFLEX + 0x0178: 0xAF, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x1E02: 0xA1, # LATIN CAPITAL LETTER B WITH DOT ABOVE + 0x1E03: 0xA2, # LATIN SMALL LETTER B WITH DOT ABOVE + 0x1E0A: 0xA6, # LATIN CAPITAL LETTER D WITH DOT ABOVE + 0x1E0B: 0xAB, # LATIN SMALL LETTER D WITH DOT ABOVE + 0x1E1E: 0xB0, # LATIN CAPITAL LETTER F WITH DOT ABOVE + 0x1E1F: 0xB1, # LATIN SMALL LETTER F WITH DOT ABOVE + 0x1E40: 0xB4, # LATIN CAPITAL LETTER M WITH DOT ABOVE + 0x1E41: 0xB5, # LATIN SMALL LETTER M WITH DOT ABOVE + 0x1E56: 0xB7, # LATIN CAPITAL LETTER P WITH DOT ABOVE + 0x1E57: 0xB9, # LATIN SMALL LETTER P WITH DOT ABOVE + 0x1E60: 0xBB, # LATIN CAPITAL LETTER S WITH DOT ABOVE + 0x1E61: 0xBF, # LATIN SMALL LETTER S WITH DOT ABOVE + 0x1E6A: 0xD7, # LATIN CAPITAL LETTER T WITH DOT ABOVE + 0x1E6B: 0xF7, # LATIN SMALL LETTER T WITH DOT ABOVE + 0x1E80: 0xA8, # LATIN CAPITAL LETTER W WITH GRAVE + 0x1E81: 0xB8, # LATIN SMALL LETTER W WITH GRAVE + 0x1E82: 0xAA, # LATIN CAPITAL LETTER W WITH ACUTE + 0x1E83: 0xBA, # LATIN SMALL LETTER W WITH ACUTE + 0x1E84: 0xBD, # LATIN CAPITAL LETTER W WITH DIAERESIS + 0x1E85: 0xBE, # LATIN SMALL LETTER W WITH DIAERESIS + 0x1EF2: 0xAC, # LATIN CAPITAL LETTER Y WITH GRAVE + 0x1EF3: 0xBC, # LATIN SMALL LETTER Y WITH GRAVE } - Modified: python/branches/ssize_t/Lib/encodings/iso8859_15.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/iso8859_15.py (original) +++ python/branches/ssize_t/Lib/encodings/iso8859_15.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\u20ac' # 0xA4 -> EURO SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\u0160' # 0xA6 -> LATIN CAPITAL LETTER S WITH CARON - u'\xa7' # 0xA7 -> SECTION SIGN - u'\u0161' # 0xA8 -> LATIN SMALL LETTER S WITH CARON - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\u017d' # 0xB4 -> LATIN CAPITAL LETTER Z WITH CARON - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u017e' # 0xB8 -> LATIN SMALL LETTER Z WITH CARON - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u0152' # 0xBC -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xBD -> LATIN SMALL LIGATURE OE - u'\u0178' # 0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\u20ac' # 0xA4 -> EURO SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\u0160' # 0xA6 -> LATIN CAPITAL LETTER S WITH CARON + u'\xa7' # 0xA7 -> SECTION SIGN + u'\u0161' # 0xA8 -> LATIN SMALL LETTER S WITH CARON + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\u017d' # 0xB4 -> LATIN CAPITAL LETTER Z WITH CARON + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u017e' # 0xB8 -> LATIN SMALL LETTER Z WITH CARON + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0152' # 0xBC -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xBD -> LATIN SMALL LIGATURE OE + u'\u0178' # 0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # - 0x0081: 0x81, # - 0x0082: 0x82, # - 0x0083: 0x83, # - 0x0084: 0x84, # - 0x0085: 0x85, # - 0x0086: 0x86, # - 0x0087: 0x87, # - 0x0088: 0x88, # - 0x0089: 0x89, # - 0x008A: 0x8A, # - 0x008B: 0x8B, # - 0x008C: 0x8C, # - 0x008D: 0x8D, # - 0x008E: 0x8E, # - 0x008F: 0x8F, # - 0x0090: 0x90, # - 0x0091: 0x91, # - 0x0092: 0x92, # - 0x0093: 0x93, # - 0x0094: 0x94, # - 0x0095: 0x95, # - 0x0096: 0x96, # - 0x0097: 0x97, # - 0x0098: 0x98, # - 0x0099: 0x99, # - 0x009A: 0x9A, # - 0x009B: 0x9B, # - 0x009C: 0x9C, # - 0x009D: 0x9D, # - 0x009E: 0x9E, # - 0x009F: 0x9F, # - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A7: 0xA7, # SECTION SIGN - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BF: 0xBF, # INVERTED QUESTION MARK - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH - 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F0: 0xF0, # LATIN SMALL LETTER ETH - 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FE: 0xFE, # LATIN SMALL LETTER THORN - 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0152: 0xBC, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xBD, # LATIN SMALL LIGATURE OE - 0x0160: 0xA6, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xA8, # LATIN SMALL LETTER S WITH CARON - 0x0178: 0xBE, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x017D: 0xB4, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xB8, # LATIN SMALL LETTER Z WITH CARON - 0x20AC: 0xA4, # EURO SIGN + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A7: 0xA7, # SECTION SIGN + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BF: 0xBF, # INVERTED QUESTION MARK + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH + 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F0: 0xF0, # LATIN SMALL LETTER ETH + 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FE: 0xFE, # LATIN SMALL LETTER THORN + 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0152: 0xBC, # LATIN CAPITAL LIGATURE OE + 0x0153: 0xBD, # LATIN SMALL LIGATURE OE + 0x0160: 0xA6, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xA8, # LATIN SMALL LETTER S WITH CARON + 0x0178: 0xBE, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x017D: 0xB4, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xB8, # LATIN SMALL LETTER Z WITH CARON + 0x20AC: 0xA4, # EURO SIGN } - Modified: python/branches/ssize_t/Lib/encodings/iso8859_16.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/iso8859_16.py (original) +++ python/branches/ssize_t/Lib/encodings/iso8859_16.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u0105' # 0xA2 -> LATIN SMALL LETTER A WITH OGONEK - u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE - u'\u20ac' # 0xA4 -> EURO SIGN - u'\u201e' # 0xA5 -> DOUBLE LOW-9 QUOTATION MARK - u'\u0160' # 0xA6 -> LATIN CAPITAL LETTER S WITH CARON - u'\xa7' # 0xA7 -> SECTION SIGN - u'\u0161' # 0xA8 -> LATIN SMALL LETTER S WITH CARON - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u0218' # 0xAA -> LATIN CAPITAL LETTER S WITH COMMA BELOW - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u0179' # 0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u017a' # 0xAE -> LATIN SMALL LETTER Z WITH ACUTE - u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u010c' # 0xB2 -> LATIN CAPITAL LETTER C WITH CARON - u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE - u'\u017d' # 0xB4 -> LATIN CAPITAL LETTER Z WITH CARON - u'\u201d' # 0xB5 -> RIGHT DOUBLE QUOTATION MARK - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u017e' # 0xB8 -> LATIN SMALL LETTER Z WITH CARON - u'\u010d' # 0xB9 -> LATIN SMALL LETTER C WITH CARON - u'\u0219' # 0xBA -> LATIN SMALL LETTER S WITH COMMA BELOW - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u0152' # 0xBC -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xBD -> LATIN SMALL LIGATURE OE - u'\u0178' # 0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0106' # 0xC5 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\u015a' # 0xD7 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u0170' # 0xD8 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0118' # 0xDD -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u021a' # 0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u0107' # 0xE5 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u015b' # 0xF7 -> LATIN SMALL LETTER S WITH ACUTE - u'\u0171' # 0xF8 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u0119' # 0xFD -> LATIN SMALL LETTER E WITH OGONEK - u'\u021b' # 0xFE -> LATIN SMALL LETTER T WITH COMMA BELOW - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0105' # 0xA2 -> LATIN SMALL LETTER A WITH OGONEK + u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE + u'\u20ac' # 0xA4 -> EURO SIGN + u'\u201e' # 0xA5 -> DOUBLE LOW-9 QUOTATION MARK + u'\u0160' # 0xA6 -> LATIN CAPITAL LETTER S WITH CARON + u'\xa7' # 0xA7 -> SECTION SIGN + u'\u0161' # 0xA8 -> LATIN SMALL LETTER S WITH CARON + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u0218' # 0xAA -> LATIN CAPITAL LETTER S WITH COMMA BELOW + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0179' # 0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u017a' # 0xAE -> LATIN SMALL LETTER Z WITH ACUTE + u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u010c' # 0xB2 -> LATIN CAPITAL LETTER C WITH CARON + u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE + u'\u017d' # 0xB4 -> LATIN CAPITAL LETTER Z WITH CARON + u'\u201d' # 0xB5 -> RIGHT DOUBLE QUOTATION MARK + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u017e' # 0xB8 -> LATIN SMALL LETTER Z WITH CARON + u'\u010d' # 0xB9 -> LATIN SMALL LETTER C WITH CARON + u'\u0219' # 0xBA -> LATIN SMALL LETTER S WITH COMMA BELOW + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0152' # 0xBC -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xBD -> LATIN SMALL LIGATURE OE + u'\u0178' # 0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0106' # 0xC5 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u015a' # 0xD7 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u0170' # 0xD8 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0118' # 0xDD -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u021a' # 0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u0107' # 0xE5 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u015b' # 0xF7 -> LATIN SMALL LETTER S WITH ACUTE + u'\u0171' # 0xF8 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0119' # 0xFD -> LATIN SMALL LETTER E WITH OGONEK + u'\u021b' # 0xFE -> LATIN SMALL LETTER T WITH COMMA BELOW + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # - 0x0081: 0x81, # - 0x0082: 0x82, # - 0x0083: 0x83, # - 0x0084: 0x84, # - 0x0085: 0x85, # - 0x0086: 0x86, # - 0x0087: 0x87, # - 0x0088: 0x88, # - 0x0089: 0x89, # - 0x008A: 0x8A, # - 0x008B: 0x8B, # - 0x008C: 0x8C, # - 0x008D: 0x8D, # - 0x008E: 0x8E, # - 0x008F: 0x8F, # - 0x0090: 0x90, # - 0x0091: 0x91, # - 0x0092: 0x92, # - 0x0093: 0x93, # - 0x0094: 0x94, # - 0x0095: 0x95, # - 0x0096: 0x96, # - 0x0097: 0x97, # - 0x0098: 0x98, # - 0x0099: 0x99, # - 0x009A: 0x9A, # - 0x009B: 0x9B, # - 0x009C: 0x9C, # - 0x009D: 0x9D, # - 0x009E: 0x9E, # - 0x009F: 0x9F, # - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A7: 0xA7, # SECTION SIGN - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE - 0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xA2, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0xC5, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xE5, # LATIN SMALL LETTER C WITH ACUTE - 0x010C: 0xB2, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0xB9, # LATIN SMALL LETTER C WITH CARON - 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE - 0x0118: 0xDD, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xFD, # LATIN SMALL LETTER E WITH OGONEK - 0x0141: 0xA3, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xB3, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE - 0x0150: 0xD5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x0151: 0xF5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x0152: 0xBC, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xBD, # LATIN SMALL LIGATURE OE - 0x015A: 0xD7, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015B: 0xF7, # LATIN SMALL LETTER S WITH ACUTE - 0x0160: 0xA6, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xA8, # LATIN SMALL LETTER S WITH CARON - 0x0170: 0xD8, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x0171: 0xF8, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x0178: 0xBE, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0179: 0xAC, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017A: 0xAE, # LATIN SMALL LETTER Z WITH ACUTE - 0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017D: 0xB4, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xB8, # LATIN SMALL LETTER Z WITH CARON - 0x0218: 0xAA, # LATIN CAPITAL LETTER S WITH COMMA BELOW - 0x0219: 0xBA, # LATIN SMALL LETTER S WITH COMMA BELOW - 0x021A: 0xDE, # LATIN CAPITAL LETTER T WITH COMMA BELOW - 0x021B: 0xFE, # LATIN SMALL LETTER T WITH COMMA BELOW - 0x201D: 0xB5, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xA5, # DOUBLE LOW-9 QUOTATION MARK - 0x20AC: 0xA4, # EURO SIGN + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A7: 0xA7, # SECTION SIGN + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE + 0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0xA2, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0xC5, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0xE5, # LATIN SMALL LETTER C WITH ACUTE + 0x010C: 0xB2, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0xB9, # LATIN SMALL LETTER C WITH CARON + 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE + 0x0118: 0xDD, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0xFD, # LATIN SMALL LETTER E WITH OGONEK + 0x0141: 0xA3, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0xB3, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE + 0x0150: 0xD5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0xF5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0152: 0xBC, # LATIN CAPITAL LIGATURE OE + 0x0153: 0xBD, # LATIN SMALL LIGATURE OE + 0x015A: 0xD7, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015B: 0xF7, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0xA6, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xA8, # LATIN SMALL LETTER S WITH CARON + 0x0170: 0xD8, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0xF8, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0178: 0xBE, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0179: 0xAC, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017A: 0xAE, # LATIN SMALL LETTER Z WITH ACUTE + 0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017D: 0xB4, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xB8, # LATIN SMALL LETTER Z WITH CARON + 0x0218: 0xAA, # LATIN CAPITAL LETTER S WITH COMMA BELOW + 0x0219: 0xBA, # LATIN SMALL LETTER S WITH COMMA BELOW + 0x021A: 0xDE, # LATIN CAPITAL LETTER T WITH COMMA BELOW + 0x021B: 0xFE, # LATIN SMALL LETTER T WITH COMMA BELOW + 0x201D: 0xB5, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xA5, # DOUBLE LOW-9 QUOTATION MARK + 0x20AC: 0xA4, # EURO SIGN } - Modified: python/branches/ssize_t/Lib/encodings/iso8859_2.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/iso8859_2.py (original) +++ python/branches/ssize_t/Lib/encodings/iso8859_2.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u02d8' # 0xA2 -> BREVE - u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\u013d' # 0xA5 -> LATIN CAPITAL LETTER L WITH CARON - u'\u015a' # 0xA6 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON - u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u0164' # 0xAB -> LATIN CAPITAL LETTER T WITH CARON - u'\u0179' # 0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON - u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK - u'\u02db' # 0xB2 -> OGONEK - u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\u013e' # 0xB5 -> LATIN SMALL LETTER L WITH CARON - u'\u015b' # 0xB6 -> LATIN SMALL LETTER S WITH ACUTE - u'\u02c7' # 0xB7 -> CARON - u'\xb8' # 0xB8 -> CEDILLA - u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON - u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA - u'\u0165' # 0xBB -> LATIN SMALL LETTER T WITH CARON - u'\u017a' # 0xBC -> LATIN SMALL LETTER Z WITH ACUTE - u'\u02dd' # 0xBD -> DOUBLE ACUTE ACCENT - u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON - u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u0154' # 0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0139' # 0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE - u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u011a' # 0xCC -> LATIN CAPITAL LETTER E WITH CARON - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u010e' # 0xCF -> LATIN CAPITAL LETTER D WITH CARON - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0147' # 0xD2 -> LATIN CAPITAL LETTER N WITH CARON - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u0158' # 0xD8 -> LATIN CAPITAL LETTER R WITH CARON - u'\u016e' # 0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\u0170' # 0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\u0162' # 0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\u0155' # 0xE0 -> LATIN SMALL LETTER R WITH ACUTE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u013a' # 0xE5 -> LATIN SMALL LETTER L WITH ACUTE - u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u011b' # 0xEC -> LATIN SMALL LETTER E WITH CARON - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u010f' # 0xEF -> LATIN SMALL LETTER D WITH CARON - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0148' # 0xF2 -> LATIN SMALL LETTER N WITH CARON - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u0159' # 0xF8 -> LATIN SMALL LETTER R WITH CARON - u'\u016f' # 0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\u0171' # 0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\u0163' # 0xFE -> LATIN SMALL LETTER T WITH CEDILLA - u'\u02d9' # 0xFF -> DOT ABOVE + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u02d8' # 0xA2 -> BREVE + u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\u013d' # 0xA5 -> LATIN CAPITAL LETTER L WITH CARON + u'\u015a' # 0xA6 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON + u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u0164' # 0xAB -> LATIN CAPITAL LETTER T WITH CARON + u'\u0179' # 0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON + u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK + u'\u02db' # 0xB2 -> OGONEK + u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\u013e' # 0xB5 -> LATIN SMALL LETTER L WITH CARON + u'\u015b' # 0xB6 -> LATIN SMALL LETTER S WITH ACUTE + u'\u02c7' # 0xB7 -> CARON + u'\xb8' # 0xB8 -> CEDILLA + u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON + u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA + u'\u0165' # 0xBB -> LATIN SMALL LETTER T WITH CARON + u'\u017a' # 0xBC -> LATIN SMALL LETTER Z WITH ACUTE + u'\u02dd' # 0xBD -> DOUBLE ACUTE ACCENT + u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON + u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u0154' # 0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0139' # 0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE + u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u011a' # 0xCC -> LATIN CAPITAL LETTER E WITH CARON + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u010e' # 0xCF -> LATIN CAPITAL LETTER D WITH CARON + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0147' # 0xD2 -> LATIN CAPITAL LETTER N WITH CARON + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u0158' # 0xD8 -> LATIN CAPITAL LETTER R WITH CARON + u'\u016e' # 0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\u0170' # 0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0162' # 0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\u0155' # 0xE0 -> LATIN SMALL LETTER R WITH ACUTE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u013a' # 0xE5 -> LATIN SMALL LETTER L WITH ACUTE + u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u011b' # 0xEC -> LATIN SMALL LETTER E WITH CARON + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u010f' # 0xEF -> LATIN SMALL LETTER D WITH CARON + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0148' # 0xF2 -> LATIN SMALL LETTER N WITH CARON + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u0159' # 0xF8 -> LATIN SMALL LETTER R WITH CARON + u'\u016f' # 0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\u0171' # 0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0163' # 0xFE -> LATIN SMALL LETTER T WITH CEDILLA + u'\u02d9' # 0xFF -> DOT ABOVE ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # - 0x0081: 0x81, # - 0x0082: 0x82, # - 0x0083: 0x83, # - 0x0084: 0x84, # - 0x0085: 0x85, # - 0x0086: 0x86, # - 0x0087: 0x87, # - 0x0088: 0x88, # - 0x0089: 0x89, # - 0x008A: 0x8A, # - 0x008B: 0x8B, # - 0x008C: 0x8C, # - 0x008D: 0x8D, # - 0x008E: 0x8E, # - 0x008F: 0x8F, # - 0x0090: 0x90, # - 0x0091: 0x91, # - 0x0092: 0x92, # - 0x0093: 0x93, # - 0x0094: 0x94, # - 0x0095: 0x95, # - 0x0096: 0x96, # - 0x0097: 0x97, # - 0x0098: 0x98, # - 0x0099: 0x99, # - 0x009A: 0x9A, # - 0x009B: 0x9B, # - 0x009C: 0x9C, # - 0x009D: 0x9D, # - 0x009E: 0x9E, # - 0x009F: 0x9F, # - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B8: 0xB8, # CEDILLA - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE - 0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE - 0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xB1, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0xC6, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xE6, # LATIN SMALL LETTER C WITH ACUTE - 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON - 0x010E: 0xCF, # LATIN CAPITAL LETTER D WITH CARON - 0x010F: 0xEF, # LATIN SMALL LETTER D WITH CARON - 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE - 0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK - 0x011A: 0xCC, # LATIN CAPITAL LETTER E WITH CARON - 0x011B: 0xEC, # LATIN SMALL LETTER E WITH CARON - 0x0139: 0xC5, # LATIN CAPITAL LETTER L WITH ACUTE - 0x013A: 0xE5, # LATIN SMALL LETTER L WITH ACUTE - 0x013D: 0xA5, # LATIN CAPITAL LETTER L WITH CARON - 0x013E: 0xB5, # LATIN SMALL LETTER L WITH CARON - 0x0141: 0xA3, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xB3, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE - 0x0147: 0xD2, # LATIN CAPITAL LETTER N WITH CARON - 0x0148: 0xF2, # LATIN SMALL LETTER N WITH CARON - 0x0150: 0xD5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x0151: 0xF5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x0154: 0xC0, # LATIN CAPITAL LETTER R WITH ACUTE - 0x0155: 0xE0, # LATIN SMALL LETTER R WITH ACUTE - 0x0158: 0xD8, # LATIN CAPITAL LETTER R WITH CARON - 0x0159: 0xF8, # LATIN SMALL LETTER R WITH CARON - 0x015A: 0xA6, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015B: 0xB6, # LATIN SMALL LETTER S WITH ACUTE - 0x015E: 0xAA, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015F: 0xBA, # LATIN SMALL LETTER S WITH CEDILLA - 0x0160: 0xA9, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xB9, # LATIN SMALL LETTER S WITH CARON - 0x0162: 0xDE, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x0163: 0xFE, # LATIN SMALL LETTER T WITH CEDILLA - 0x0164: 0xAB, # LATIN CAPITAL LETTER T WITH CARON - 0x0165: 0xBB, # LATIN SMALL LETTER T WITH CARON - 0x016E: 0xD9, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x016F: 0xF9, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0170: 0xDB, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x0171: 0xFB, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x0179: 0xAC, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017A: 0xBC, # LATIN SMALL LETTER Z WITH ACUTE - 0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017D: 0xAE, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xBE, # LATIN SMALL LETTER Z WITH CARON - 0x02C7: 0xB7, # CARON - 0x02D8: 0xA2, # BREVE - 0x02D9: 0xFF, # DOT ABOVE - 0x02DB: 0xB2, # OGONEK - 0x02DD: 0xBD, # DOUBLE ACUTE ACCENT + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B8: 0xB8, # CEDILLA + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE + 0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE + 0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0xB1, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0xC6, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0xE6, # LATIN SMALL LETTER C WITH ACUTE + 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON + 0x010E: 0xCF, # LATIN CAPITAL LETTER D WITH CARON + 0x010F: 0xEF, # LATIN SMALL LETTER D WITH CARON + 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE + 0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK + 0x011A: 0xCC, # LATIN CAPITAL LETTER E WITH CARON + 0x011B: 0xEC, # LATIN SMALL LETTER E WITH CARON + 0x0139: 0xC5, # LATIN CAPITAL LETTER L WITH ACUTE + 0x013A: 0xE5, # LATIN SMALL LETTER L WITH ACUTE + 0x013D: 0xA5, # LATIN CAPITAL LETTER L WITH CARON + 0x013E: 0xB5, # LATIN SMALL LETTER L WITH CARON + 0x0141: 0xA3, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0xB3, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE + 0x0147: 0xD2, # LATIN CAPITAL LETTER N WITH CARON + 0x0148: 0xF2, # LATIN SMALL LETTER N WITH CARON + 0x0150: 0xD5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0xF5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0154: 0xC0, # LATIN CAPITAL LETTER R WITH ACUTE + 0x0155: 0xE0, # LATIN SMALL LETTER R WITH ACUTE + 0x0158: 0xD8, # LATIN CAPITAL LETTER R WITH CARON + 0x0159: 0xF8, # LATIN SMALL LETTER R WITH CARON + 0x015A: 0xA6, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015B: 0xB6, # LATIN SMALL LETTER S WITH ACUTE + 0x015E: 0xAA, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015F: 0xBA, # LATIN SMALL LETTER S WITH CEDILLA + 0x0160: 0xA9, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xB9, # LATIN SMALL LETTER S WITH CARON + 0x0162: 0xDE, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x0163: 0xFE, # LATIN SMALL LETTER T WITH CEDILLA + 0x0164: 0xAB, # LATIN CAPITAL LETTER T WITH CARON + 0x0165: 0xBB, # LATIN SMALL LETTER T WITH CARON + 0x016E: 0xD9, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x016F: 0xF9, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0170: 0xDB, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0xFB, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0179: 0xAC, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017A: 0xBC, # LATIN SMALL LETTER Z WITH ACUTE + 0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017D: 0xAE, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xBE, # LATIN SMALL LETTER Z WITH CARON + 0x02C7: 0xB7, # CARON + 0x02D8: 0xA2, # BREVE + 0x02D9: 0xFF, # DOT ABOVE + 0x02DB: 0xB2, # OGONEK + 0x02DD: 0xBD, # DOUBLE ACUTE ACCENT } - Modified: python/branches/ssize_t/Lib/encodings/iso8859_3.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/iso8859_3.py (original) +++ python/branches/ssize_t/Lib/encodings/iso8859_3.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,515 +32,514 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0126' # 0xA1 -> LATIN CAPITAL LETTER H WITH STROKE - u'\u02d8' # 0xA2 -> BREVE - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0126' # 0xA1 -> LATIN CAPITAL LETTER H WITH STROKE + u'\u02d8' # 0xA2 -> BREVE + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN u'\ufffe' - u'\u0124' # 0xA6 -> LATIN CAPITAL LETTER H WITH CIRCUMFLEX - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\u0130' # 0xA9 -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u011e' # 0xAB -> LATIN CAPITAL LETTER G WITH BREVE - u'\u0134' # 0xAC -> LATIN CAPITAL LETTER J WITH CIRCUMFLEX - u'\xad' # 0xAD -> SOFT HYPHEN + u'\u0124' # 0xA6 -> LATIN CAPITAL LETTER H WITH CIRCUMFLEX + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\u0130' # 0xA9 -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u011e' # 0xAB -> LATIN CAPITAL LETTER G WITH BREVE + u'\u0134' # 0xAC -> LATIN CAPITAL LETTER J WITH CIRCUMFLEX + u'\xad' # 0xAD -> SOFT HYPHEN u'\ufffe' - u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\u0127' # 0xB1 -> LATIN SMALL LETTER H WITH STROKE - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u0125' # 0xB6 -> LATIN SMALL LETTER H WITH CIRCUMFLEX - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\u0131' # 0xB9 -> LATIN SMALL LETTER DOTLESS I - u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA - u'\u011f' # 0xBB -> LATIN SMALL LETTER G WITH BREVE - u'\u0135' # 0xBC -> LATIN SMALL LETTER J WITH CIRCUMFLEX - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\u0127' # 0xB1 -> LATIN SMALL LETTER H WITH STROKE + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u0125' # 0xB6 -> LATIN SMALL LETTER H WITH CIRCUMFLEX + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\u0131' # 0xB9 -> LATIN SMALL LETTER DOTLESS I + u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA + u'\u011f' # 0xBB -> LATIN SMALL LETTER G WITH BREVE + u'\u0135' # 0xBC -> LATIN SMALL LETTER J WITH CIRCUMFLEX + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF u'\ufffe' - u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX u'\ufffe' - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u010a' # 0xC5 -> LATIN CAPITAL LETTER C WITH DOT ABOVE - u'\u0108' # 0xC6 -> LATIN CAPITAL LETTER C WITH CIRCUMFLEX - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u010a' # 0xC5 -> LATIN CAPITAL LETTER C WITH DOT ABOVE + u'\u0108' # 0xC6 -> LATIN CAPITAL LETTER C WITH CIRCUMFLEX + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\ufffe' - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0120' # 0xD5 -> LATIN CAPITAL LETTER G WITH DOT ABOVE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u011c' # 0xD8 -> LATIN CAPITAL LETTER G WITH CIRCUMFLEX - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u016c' # 0xDD -> LATIN CAPITAL LETTER U WITH BREVE - u'\u015c' # 0xDE -> LATIN CAPITAL LETTER S WITH CIRCUMFLEX - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0120' # 0xD5 -> LATIN CAPITAL LETTER G WITH DOT ABOVE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u011c' # 0xD8 -> LATIN CAPITAL LETTER G WITH CIRCUMFLEX + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u016c' # 0xDD -> LATIN CAPITAL LETTER U WITH BREVE + u'\u015c' # 0xDE -> LATIN CAPITAL LETTER S WITH CIRCUMFLEX + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\ufffe' - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u010b' # 0xE5 -> LATIN SMALL LETTER C WITH DOT ABOVE - u'\u0109' # 0xE6 -> LATIN SMALL LETTER C WITH CIRCUMFLEX - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u010b' # 0xE5 -> LATIN SMALL LETTER C WITH DOT ABOVE + u'\u0109' # 0xE6 -> LATIN SMALL LETTER C WITH CIRCUMFLEX + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS u'\ufffe' - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u0121' # 0xF5 -> LATIN SMALL LETTER G WITH DOT ABOVE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u011d' # 0xF8 -> LATIN SMALL LETTER G WITH CIRCUMFLEX - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u016d' # 0xFD -> LATIN SMALL LETTER U WITH BREVE - u'\u015d' # 0xFE -> LATIN SMALL LETTER S WITH CIRCUMFLEX - u'\u02d9' # 0xFF -> DOT ABOVE + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0121' # 0xF5 -> LATIN SMALL LETTER G WITH DOT ABOVE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u011d' # 0xF8 -> LATIN SMALL LETTER G WITH CIRCUMFLEX + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u016d' # 0xFD -> LATIN SMALL LETTER U WITH BREVE + u'\u015d' # 0xFE -> LATIN SMALL LETTER S WITH CIRCUMFLEX + u'\u02d9' # 0xFF -> DOT ABOVE ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # - 0x0081: 0x81, # - 0x0082: 0x82, # - 0x0083: 0x83, # - 0x0084: 0x84, # - 0x0085: 0x85, # - 0x0086: 0x86, # - 0x0087: 0x87, # - 0x0088: 0x88, # - 0x0089: 0x89, # - 0x008A: 0x8A, # - 0x008B: 0x8B, # - 0x008C: 0x8C, # - 0x008D: 0x8D, # - 0x008E: 0x8E, # - 0x008F: 0x8F, # - 0x0090: 0x90, # - 0x0091: 0x91, # - 0x0092: 0x92, # - 0x0093: 0x93, # - 0x0094: 0x94, # - 0x0095: 0x95, # - 0x0096: 0x96, # - 0x0097: 0x97, # - 0x0098: 0x98, # - 0x0099: 0x99, # - 0x009A: 0x9A, # - 0x009B: 0x9B, # - 0x009C: 0x9C, # - 0x009D: 0x9D, # - 0x009E: 0x9E, # - 0x009F: 0x9F, # - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0108: 0xC6, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX - 0x0109: 0xE6, # LATIN SMALL LETTER C WITH CIRCUMFLEX - 0x010A: 0xC5, # LATIN CAPITAL LETTER C WITH DOT ABOVE - 0x010B: 0xE5, # LATIN SMALL LETTER C WITH DOT ABOVE - 0x011C: 0xD8, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX - 0x011D: 0xF8, # LATIN SMALL LETTER G WITH CIRCUMFLEX - 0x011E: 0xAB, # LATIN CAPITAL LETTER G WITH BREVE - 0x011F: 0xBB, # LATIN SMALL LETTER G WITH BREVE - 0x0120: 0xD5, # LATIN CAPITAL LETTER G WITH DOT ABOVE - 0x0121: 0xF5, # LATIN SMALL LETTER G WITH DOT ABOVE - 0x0124: 0xA6, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX - 0x0125: 0xB6, # LATIN SMALL LETTER H WITH CIRCUMFLEX - 0x0126: 0xA1, # LATIN CAPITAL LETTER H WITH STROKE - 0x0127: 0xB1, # LATIN SMALL LETTER H WITH STROKE - 0x0130: 0xA9, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0131: 0xB9, # LATIN SMALL LETTER DOTLESS I - 0x0134: 0xAC, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX - 0x0135: 0xBC, # LATIN SMALL LETTER J WITH CIRCUMFLEX - 0x015C: 0xDE, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX - 0x015D: 0xFE, # LATIN SMALL LETTER S WITH CIRCUMFLEX - 0x015E: 0xAA, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015F: 0xBA, # LATIN SMALL LETTER S WITH CEDILLA - 0x016C: 0xDD, # LATIN CAPITAL LETTER U WITH BREVE - 0x016D: 0xFD, # LATIN SMALL LETTER U WITH BREVE - 0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x02D8: 0xA2, # BREVE - 0x02D9: 0xFF, # DOT ABOVE + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0108: 0xC6, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX + 0x0109: 0xE6, # LATIN SMALL LETTER C WITH CIRCUMFLEX + 0x010A: 0xC5, # LATIN CAPITAL LETTER C WITH DOT ABOVE + 0x010B: 0xE5, # LATIN SMALL LETTER C WITH DOT ABOVE + 0x011C: 0xD8, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX + 0x011D: 0xF8, # LATIN SMALL LETTER G WITH CIRCUMFLEX + 0x011E: 0xAB, # LATIN CAPITAL LETTER G WITH BREVE + 0x011F: 0xBB, # LATIN SMALL LETTER G WITH BREVE + 0x0120: 0xD5, # LATIN CAPITAL LETTER G WITH DOT ABOVE + 0x0121: 0xF5, # LATIN SMALL LETTER G WITH DOT ABOVE + 0x0124: 0xA6, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX + 0x0125: 0xB6, # LATIN SMALL LETTER H WITH CIRCUMFLEX + 0x0126: 0xA1, # LATIN CAPITAL LETTER H WITH STROKE + 0x0127: 0xB1, # LATIN SMALL LETTER H WITH STROKE + 0x0130: 0xA9, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0xB9, # LATIN SMALL LETTER DOTLESS I + 0x0134: 0xAC, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX + 0x0135: 0xBC, # LATIN SMALL LETTER J WITH CIRCUMFLEX + 0x015C: 0xDE, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX + 0x015D: 0xFE, # LATIN SMALL LETTER S WITH CIRCUMFLEX + 0x015E: 0xAA, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015F: 0xBA, # LATIN SMALL LETTER S WITH CEDILLA + 0x016C: 0xDD, # LATIN CAPITAL LETTER U WITH BREVE + 0x016D: 0xFD, # LATIN SMALL LETTER U WITH BREVE + 0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x02D8: 0xA2, # BREVE + 0x02D9: 0xFF, # DOT ABOVE } - Modified: python/branches/ssize_t/Lib/encodings/iso8859_4.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/iso8859_4.py (original) +++ python/branches/ssize_t/Lib/encodings/iso8859_4.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u0138' # 0xA2 -> LATIN SMALL LETTER KRA - u'\u0156' # 0xA3 -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\u0128' # 0xA5 -> LATIN CAPITAL LETTER I WITH TILDE - u'\u013b' # 0xA6 -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON - u'\u0112' # 0xAA -> LATIN CAPITAL LETTER E WITH MACRON - u'\u0122' # 0xAB -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u0166' # 0xAC -> LATIN CAPITAL LETTER T WITH STROKE - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK - u'\u02db' # 0xB2 -> OGONEK - u'\u0157' # 0xB3 -> LATIN SMALL LETTER R WITH CEDILLA - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\u0129' # 0xB5 -> LATIN SMALL LETTER I WITH TILDE - u'\u013c' # 0xB6 -> LATIN SMALL LETTER L WITH CEDILLA - u'\u02c7' # 0xB7 -> CARON - u'\xb8' # 0xB8 -> CEDILLA - u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON - u'\u0113' # 0xBA -> LATIN SMALL LETTER E WITH MACRON - u'\u0123' # 0xBB -> LATIN SMALL LETTER G WITH CEDILLA - u'\u0167' # 0xBC -> LATIN SMALL LETTER T WITH STROKE - u'\u014a' # 0xBD -> LATIN CAPITAL LETTER ENG - u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON - u'\u014b' # 0xBF -> LATIN SMALL LETTER ENG - u'\u0100' # 0xC0 -> LATIN CAPITAL LETTER A WITH MACRON - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\u012e' # 0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u0116' # 0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u012a' # 0xCF -> LATIN CAPITAL LETTER I WITH MACRON - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0145' # 0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\u014c' # 0xD2 -> LATIN CAPITAL LETTER O WITH MACRON - u'\u0136' # 0xD3 -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\u0172' # 0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0168' # 0xDD -> LATIN CAPITAL LETTER U WITH TILDE - u'\u016a' # 0xDE -> LATIN CAPITAL LETTER U WITH MACRON - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\u0101' # 0xE0 -> LATIN SMALL LETTER A WITH MACRON - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\u012f' # 0xE7 -> LATIN SMALL LETTER I WITH OGONEK - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0117' # 0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u012b' # 0xEF -> LATIN SMALL LETTER I WITH MACRON - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0146' # 0xF1 -> LATIN SMALL LETTER N WITH CEDILLA - u'\u014d' # 0xF2 -> LATIN SMALL LETTER O WITH MACRON - u'\u0137' # 0xF3 -> LATIN SMALL LETTER K WITH CEDILLA - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\u0173' # 0xF9 -> LATIN SMALL LETTER U WITH OGONEK - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u0169' # 0xFD -> LATIN SMALL LETTER U WITH TILDE - u'\u016b' # 0xFE -> LATIN SMALL LETTER U WITH MACRON - u'\u02d9' # 0xFF -> DOT ABOVE + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0138' # 0xA2 -> LATIN SMALL LETTER KRA + u'\u0156' # 0xA3 -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\u0128' # 0xA5 -> LATIN CAPITAL LETTER I WITH TILDE + u'\u013b' # 0xA6 -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0112' # 0xAA -> LATIN CAPITAL LETTER E WITH MACRON + u'\u0122' # 0xAB -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u0166' # 0xAC -> LATIN CAPITAL LETTER T WITH STROKE + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK + u'\u02db' # 0xB2 -> OGONEK + u'\u0157' # 0xB3 -> LATIN SMALL LETTER R WITH CEDILLA + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\u0129' # 0xB5 -> LATIN SMALL LETTER I WITH TILDE + u'\u013c' # 0xB6 -> LATIN SMALL LETTER L WITH CEDILLA + u'\u02c7' # 0xB7 -> CARON + u'\xb8' # 0xB8 -> CEDILLA + u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON + u'\u0113' # 0xBA -> LATIN SMALL LETTER E WITH MACRON + u'\u0123' # 0xBB -> LATIN SMALL LETTER G WITH CEDILLA + u'\u0167' # 0xBC -> LATIN SMALL LETTER T WITH STROKE + u'\u014a' # 0xBD -> LATIN CAPITAL LETTER ENG + u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON + u'\u014b' # 0xBF -> LATIN SMALL LETTER ENG + u'\u0100' # 0xC0 -> LATIN CAPITAL LETTER A WITH MACRON + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\u012e' # 0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u0116' # 0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u012a' # 0xCF -> LATIN CAPITAL LETTER I WITH MACRON + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0145' # 0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\u014c' # 0xD2 -> LATIN CAPITAL LETTER O WITH MACRON + u'\u0136' # 0xD3 -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\u0172' # 0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0168' # 0xDD -> LATIN CAPITAL LETTER U WITH TILDE + u'\u016a' # 0xDE -> LATIN CAPITAL LETTER U WITH MACRON + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\u0101' # 0xE0 -> LATIN SMALL LETTER A WITH MACRON + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\u012f' # 0xE7 -> LATIN SMALL LETTER I WITH OGONEK + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0117' # 0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u012b' # 0xEF -> LATIN SMALL LETTER I WITH MACRON + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0146' # 0xF1 -> LATIN SMALL LETTER N WITH CEDILLA + u'\u014d' # 0xF2 -> LATIN SMALL LETTER O WITH MACRON + u'\u0137' # 0xF3 -> LATIN SMALL LETTER K WITH CEDILLA + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\u0173' # 0xF9 -> LATIN SMALL LETTER U WITH OGONEK + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0169' # 0xFD -> LATIN SMALL LETTER U WITH TILDE + u'\u016b' # 0xFE -> LATIN SMALL LETTER U WITH MACRON + u'\u02d9' # 0xFF -> DOT ABOVE ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # - 0x0081: 0x81, # - 0x0082: 0x82, # - 0x0083: 0x83, # - 0x0084: 0x84, # - 0x0085: 0x85, # - 0x0086: 0x86, # - 0x0087: 0x87, # - 0x0088: 0x88, # - 0x0089: 0x89, # - 0x008A: 0x8A, # - 0x008B: 0x8B, # - 0x008C: 0x8C, # - 0x008D: 0x8D, # - 0x008E: 0x8E, # - 0x008F: 0x8F, # - 0x0090: 0x90, # - 0x0091: 0x91, # - 0x0092: 0x92, # - 0x0093: 0x93, # - 0x0094: 0x94, # - 0x0095: 0x95, # - 0x0096: 0x96, # - 0x0097: 0x97, # - 0x0098: 0x98, # - 0x0099: 0x99, # - 0x009A: 0x9A, # - 0x009B: 0x9B, # - 0x009C: 0x9C, # - 0x009D: 0x9D, # - 0x009E: 0x9E, # - 0x009F: 0x9F, # - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B8: 0xB8, # CEDILLA - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0100: 0xC0, # LATIN CAPITAL LETTER A WITH MACRON - 0x0101: 0xE0, # LATIN SMALL LETTER A WITH MACRON - 0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xB1, # LATIN SMALL LETTER A WITH OGONEK - 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON - 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE - 0x0112: 0xAA, # LATIN CAPITAL LETTER E WITH MACRON - 0x0113: 0xBA, # LATIN SMALL LETTER E WITH MACRON - 0x0116: 0xCC, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0117: 0xEC, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK - 0x0122: 0xAB, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0xBB, # LATIN SMALL LETTER G WITH CEDILLA - 0x0128: 0xA5, # LATIN CAPITAL LETTER I WITH TILDE - 0x0129: 0xB5, # LATIN SMALL LETTER I WITH TILDE - 0x012A: 0xCF, # LATIN CAPITAL LETTER I WITH MACRON - 0x012B: 0xEF, # LATIN SMALL LETTER I WITH MACRON - 0x012E: 0xC7, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012F: 0xE7, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0xD3, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0xF3, # LATIN SMALL LETTER K WITH CEDILLA - 0x0138: 0xA2, # LATIN SMALL LETTER KRA - 0x013B: 0xA6, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013C: 0xB6, # LATIN SMALL LETTER L WITH CEDILLA - 0x0145: 0xD1, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0xF1, # LATIN SMALL LETTER N WITH CEDILLA - 0x014A: 0xBD, # LATIN CAPITAL LETTER ENG - 0x014B: 0xBF, # LATIN SMALL LETTER ENG - 0x014C: 0xD2, # LATIN CAPITAL LETTER O WITH MACRON - 0x014D: 0xF2, # LATIN SMALL LETTER O WITH MACRON - 0x0156: 0xA3, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x0157: 0xB3, # LATIN SMALL LETTER R WITH CEDILLA - 0x0160: 0xA9, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xB9, # LATIN SMALL LETTER S WITH CARON - 0x0166: 0xAC, # LATIN CAPITAL LETTER T WITH STROKE - 0x0167: 0xBC, # LATIN SMALL LETTER T WITH STROKE - 0x0168: 0xDD, # LATIN CAPITAL LETTER U WITH TILDE - 0x0169: 0xFD, # LATIN SMALL LETTER U WITH TILDE - 0x016A: 0xDE, # LATIN CAPITAL LETTER U WITH MACRON - 0x016B: 0xFE, # LATIN SMALL LETTER U WITH MACRON - 0x0172: 0xD9, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0xF9, # LATIN SMALL LETTER U WITH OGONEK - 0x017D: 0xAE, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xBE, # LATIN SMALL LETTER Z WITH CARON - 0x02C7: 0xB7, # CARON - 0x02D9: 0xFF, # DOT ABOVE - 0x02DB: 0xB2, # OGONEK + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B8: 0xB8, # CEDILLA + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0100: 0xC0, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0xE0, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0xB1, # LATIN SMALL LETTER A WITH OGONEK + 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON + 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE + 0x0112: 0xAA, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0xBA, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0xCC, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0xEC, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0xAB, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0xBB, # LATIN SMALL LETTER G WITH CEDILLA + 0x0128: 0xA5, # LATIN CAPITAL LETTER I WITH TILDE + 0x0129: 0xB5, # LATIN SMALL LETTER I WITH TILDE + 0x012A: 0xCF, # LATIN CAPITAL LETTER I WITH MACRON + 0x012B: 0xEF, # LATIN SMALL LETTER I WITH MACRON + 0x012E: 0xC7, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012F: 0xE7, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0xD3, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0xF3, # LATIN SMALL LETTER K WITH CEDILLA + 0x0138: 0xA2, # LATIN SMALL LETTER KRA + 0x013B: 0xA6, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013C: 0xB6, # LATIN SMALL LETTER L WITH CEDILLA + 0x0145: 0xD1, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0xF1, # LATIN SMALL LETTER N WITH CEDILLA + 0x014A: 0xBD, # LATIN CAPITAL LETTER ENG + 0x014B: 0xBF, # LATIN SMALL LETTER ENG + 0x014C: 0xD2, # LATIN CAPITAL LETTER O WITH MACRON + 0x014D: 0xF2, # LATIN SMALL LETTER O WITH MACRON + 0x0156: 0xA3, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0xB3, # LATIN SMALL LETTER R WITH CEDILLA + 0x0160: 0xA9, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xB9, # LATIN SMALL LETTER S WITH CARON + 0x0166: 0xAC, # LATIN CAPITAL LETTER T WITH STROKE + 0x0167: 0xBC, # LATIN SMALL LETTER T WITH STROKE + 0x0168: 0xDD, # LATIN CAPITAL LETTER U WITH TILDE + 0x0169: 0xFD, # LATIN SMALL LETTER U WITH TILDE + 0x016A: 0xDE, # LATIN CAPITAL LETTER U WITH MACRON + 0x016B: 0xFE, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0xD9, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0xF9, # LATIN SMALL LETTER U WITH OGONEK + 0x017D: 0xAE, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xBE, # LATIN SMALL LETTER Z WITH CARON + 0x02C7: 0xB7, # CARON + 0x02D9: 0xFF, # DOT ABOVE + 0x02DB: 0xB2, # OGONEK } - Modified: python/branches/ssize_t/Lib/encodings/iso8859_5.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/iso8859_5.py (original) +++ python/branches/ssize_t/Lib/encodings/iso8859_5.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0401' # 0xA1 -> CYRILLIC CAPITAL LETTER IO - u'\u0402' # 0xA2 -> CYRILLIC CAPITAL LETTER DJE - u'\u0403' # 0xA3 -> CYRILLIC CAPITAL LETTER GJE - u'\u0404' # 0xA4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u0405' # 0xA5 -> CYRILLIC CAPITAL LETTER DZE - u'\u0406' # 0xA6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0407' # 0xA7 -> CYRILLIC CAPITAL LETTER YI - u'\u0408' # 0xA8 -> CYRILLIC CAPITAL LETTER JE - u'\u0409' # 0xA9 -> CYRILLIC CAPITAL LETTER LJE - u'\u040a' # 0xAA -> CYRILLIC CAPITAL LETTER NJE - u'\u040b' # 0xAB -> CYRILLIC CAPITAL LETTER TSHE - u'\u040c' # 0xAC -> CYRILLIC CAPITAL LETTER KJE - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u040e' # 0xAE -> CYRILLIC CAPITAL LETTER SHORT U - u'\u040f' # 0xAF -> CYRILLIC CAPITAL LETTER DZHE - u'\u0410' # 0xB0 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0xB1 -> CYRILLIC CAPITAL LETTER BE - u'\u0412' # 0xB2 -> CYRILLIC CAPITAL LETTER VE - u'\u0413' # 0xB3 -> CYRILLIC CAPITAL LETTER GHE - u'\u0414' # 0xB4 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0xB5 -> CYRILLIC CAPITAL LETTER IE - u'\u0416' # 0xB6 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0417' # 0xB7 -> CYRILLIC CAPITAL LETTER ZE - u'\u0418' # 0xB8 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0xB9 -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0xBA -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0xBB -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0xBC -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0xBD -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0xBE -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0xBF -> CYRILLIC CAPITAL LETTER PE - u'\u0420' # 0xC0 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0xC1 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0xC2 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0xC3 -> CYRILLIC CAPITAL LETTER U - u'\u0424' # 0xC4 -> CYRILLIC CAPITAL LETTER EF - u'\u0425' # 0xC5 -> CYRILLIC CAPITAL LETTER HA - u'\u0426' # 0xC6 -> CYRILLIC CAPITAL LETTER TSE - u'\u0427' # 0xC7 -> CYRILLIC CAPITAL LETTER CHE - u'\u0428' # 0xC8 -> CYRILLIC CAPITAL LETTER SHA - u'\u0429' # 0xC9 -> CYRILLIC CAPITAL LETTER SHCHA - u'\u042a' # 0xCA -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u042b' # 0xCB -> CYRILLIC CAPITAL LETTER YERU - u'\u042c' # 0xCC -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042d' # 0xCD -> CYRILLIC CAPITAL LETTER E - u'\u042e' # 0xCE -> CYRILLIC CAPITAL LETTER YU - u'\u042f' # 0xCF -> CYRILLIC CAPITAL LETTER YA - u'\u0430' # 0xD0 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xD1 -> CYRILLIC SMALL LETTER BE - u'\u0432' # 0xD2 -> CYRILLIC SMALL LETTER VE - u'\u0433' # 0xD3 -> CYRILLIC SMALL LETTER GHE - u'\u0434' # 0xD4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xD5 -> CYRILLIC SMALL LETTER IE - u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE - u'\u0437' # 0xD7 -> CYRILLIC SMALL LETTER ZE - u'\u0438' # 0xD8 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xD9 -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xDA -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xDB -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xDC -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xDD -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xDE -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xDF -> CYRILLIC SMALL LETTER PE - u'\u0440' # 0xE0 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xE1 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xE2 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xE3 -> CYRILLIC SMALL LETTER U - u'\u0444' # 0xE4 -> CYRILLIC SMALL LETTER EF - u'\u0445' # 0xE5 -> CYRILLIC SMALL LETTER HA - u'\u0446' # 0xE6 -> CYRILLIC SMALL LETTER TSE - u'\u0447' # 0xE7 -> CYRILLIC SMALL LETTER CHE - u'\u0448' # 0xE8 -> CYRILLIC SMALL LETTER SHA - u'\u0449' # 0xE9 -> CYRILLIC SMALL LETTER SHCHA - u'\u044a' # 0xEA -> CYRILLIC SMALL LETTER HARD SIGN - u'\u044b' # 0xEB -> CYRILLIC SMALL LETTER YERU - u'\u044c' # 0xEC -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044d' # 0xED -> CYRILLIC SMALL LETTER E - u'\u044e' # 0xEE -> CYRILLIC SMALL LETTER YU - u'\u044f' # 0xEF -> CYRILLIC SMALL LETTER YA - u'\u2116' # 0xF0 -> NUMERO SIGN - u'\u0451' # 0xF1 -> CYRILLIC SMALL LETTER IO - u'\u0452' # 0xF2 -> CYRILLIC SMALL LETTER DJE - u'\u0453' # 0xF3 -> CYRILLIC SMALL LETTER GJE - u'\u0454' # 0xF4 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u0455' # 0xF5 -> CYRILLIC SMALL LETTER DZE - u'\u0456' # 0xF6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0457' # 0xF7 -> CYRILLIC SMALL LETTER YI - u'\u0458' # 0xF8 -> CYRILLIC SMALL LETTER JE - u'\u0459' # 0xF9 -> CYRILLIC SMALL LETTER LJE - u'\u045a' # 0xFA -> CYRILLIC SMALL LETTER NJE - u'\u045b' # 0xFB -> CYRILLIC SMALL LETTER TSHE - u'\u045c' # 0xFC -> CYRILLIC SMALL LETTER KJE - u'\xa7' # 0xFD -> SECTION SIGN - u'\u045e' # 0xFE -> CYRILLIC SMALL LETTER SHORT U - u'\u045f' # 0xFF -> CYRILLIC SMALL LETTER DZHE + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0401' # 0xA1 -> CYRILLIC CAPITAL LETTER IO + u'\u0402' # 0xA2 -> CYRILLIC CAPITAL LETTER DJE + u'\u0403' # 0xA3 -> CYRILLIC CAPITAL LETTER GJE + u'\u0404' # 0xA4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0405' # 0xA5 -> CYRILLIC CAPITAL LETTER DZE + u'\u0406' # 0xA6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0407' # 0xA7 -> CYRILLIC CAPITAL LETTER YI + u'\u0408' # 0xA8 -> CYRILLIC CAPITAL LETTER JE + u'\u0409' # 0xA9 -> CYRILLIC CAPITAL LETTER LJE + u'\u040a' # 0xAA -> CYRILLIC CAPITAL LETTER NJE + u'\u040b' # 0xAB -> CYRILLIC CAPITAL LETTER TSHE + u'\u040c' # 0xAC -> CYRILLIC CAPITAL LETTER KJE + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u040e' # 0xAE -> CYRILLIC CAPITAL LETTER SHORT U + u'\u040f' # 0xAF -> CYRILLIC CAPITAL LETTER DZHE + u'\u0410' # 0xB0 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0xB1 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0xB2 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0xB3 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0xB4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0xB5 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0xB6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0xB7 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0xB8 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0xB9 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0xBA -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0xBB -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0xBC -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0xBD -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0xBE -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0xBF -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0xC0 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0xC1 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0xC2 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0xC3 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0xC4 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0xC5 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0xC6 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0xC7 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0xC8 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0xC9 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0xCA -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0xCB -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0xCC -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0xCD -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0xCE -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0xCF -> CYRILLIC CAPITAL LETTER YA + u'\u0430' # 0xD0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0xD1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0xD2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0xD3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0xD4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0xD5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0xD7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0xD8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0xD9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0xDA -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0xDB -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0xDC -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0xDD -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0xDE -> CYRILLIC SMALL LETTER O + u'\u043f' # 0xDF -> CYRILLIC SMALL LETTER PE + u'\u0440' # 0xE0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0xE1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0xE2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0xE3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0xE4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0xE5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0xE6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0xE7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0xE8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0xE9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0xEA -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0xEB -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0xEC -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0xED -> CYRILLIC SMALL LETTER E + u'\u044e' # 0xEE -> CYRILLIC SMALL LETTER YU + u'\u044f' # 0xEF -> CYRILLIC SMALL LETTER YA + u'\u2116' # 0xF0 -> NUMERO SIGN + u'\u0451' # 0xF1 -> CYRILLIC SMALL LETTER IO + u'\u0452' # 0xF2 -> CYRILLIC SMALL LETTER DJE + u'\u0453' # 0xF3 -> CYRILLIC SMALL LETTER GJE + u'\u0454' # 0xF4 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0455' # 0xF5 -> CYRILLIC SMALL LETTER DZE + u'\u0456' # 0xF6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0457' # 0xF7 -> CYRILLIC SMALL LETTER YI + u'\u0458' # 0xF8 -> CYRILLIC SMALL LETTER JE + u'\u0459' # 0xF9 -> CYRILLIC SMALL LETTER LJE + u'\u045a' # 0xFA -> CYRILLIC SMALL LETTER NJE + u'\u045b' # 0xFB -> CYRILLIC SMALL LETTER TSHE + u'\u045c' # 0xFC -> CYRILLIC SMALL LETTER KJE + u'\xa7' # 0xFD -> SECTION SIGN + u'\u045e' # 0xFE -> CYRILLIC SMALL LETTER SHORT U + u'\u045f' # 0xFF -> CYRILLIC SMALL LETTER DZHE ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # - 0x0081: 0x81, # - 0x0082: 0x82, # - 0x0083: 0x83, # - 0x0084: 0x84, # - 0x0085: 0x85, # - 0x0086: 0x86, # - 0x0087: 0x87, # - 0x0088: 0x88, # - 0x0089: 0x89, # - 0x008A: 0x8A, # - 0x008B: 0x8B, # - 0x008C: 0x8C, # - 0x008D: 0x8D, # - 0x008E: 0x8E, # - 0x008F: 0x8F, # - 0x0090: 0x90, # - 0x0091: 0x91, # - 0x0092: 0x92, # - 0x0093: 0x93, # - 0x0094: 0x94, # - 0x0095: 0x95, # - 0x0096: 0x96, # - 0x0097: 0x97, # - 0x0098: 0x98, # - 0x0099: 0x99, # - 0x009A: 0x9A, # - 0x009B: 0x9B, # - 0x009C: 0x9C, # - 0x009D: 0x9D, # - 0x009E: 0x9E, # - 0x009F: 0x9F, # - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A7: 0xFD, # SECTION SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x0401: 0xA1, # CYRILLIC CAPITAL LETTER IO - 0x0402: 0xA2, # CYRILLIC CAPITAL LETTER DJE - 0x0403: 0xA3, # CYRILLIC CAPITAL LETTER GJE - 0x0404: 0xA4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0405: 0xA5, # CYRILLIC CAPITAL LETTER DZE - 0x0406: 0xA6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0407: 0xA7, # CYRILLIC CAPITAL LETTER YI - 0x0408: 0xA8, # CYRILLIC CAPITAL LETTER JE - 0x0409: 0xA9, # CYRILLIC CAPITAL LETTER LJE - 0x040A: 0xAA, # CYRILLIC CAPITAL LETTER NJE - 0x040B: 0xAB, # CYRILLIC CAPITAL LETTER TSHE - 0x040C: 0xAC, # CYRILLIC CAPITAL LETTER KJE - 0x040E: 0xAE, # CYRILLIC CAPITAL LETTER SHORT U - 0x040F: 0xAF, # CYRILLIC CAPITAL LETTER DZHE - 0x0410: 0xB0, # CYRILLIC CAPITAL LETTER A - 0x0411: 0xB1, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0xB2, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0xB3, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0xB4, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0xB5, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0xB6, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0xB7, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0xB8, # CYRILLIC CAPITAL LETTER I - 0x0419: 0xB9, # CYRILLIC CAPITAL LETTER SHORT I - 0x041A: 0xBA, # CYRILLIC CAPITAL LETTER KA - 0x041B: 0xBB, # CYRILLIC CAPITAL LETTER EL - 0x041C: 0xBC, # CYRILLIC CAPITAL LETTER EM - 0x041D: 0xBD, # CYRILLIC CAPITAL LETTER EN - 0x041E: 0xBE, # CYRILLIC CAPITAL LETTER O - 0x041F: 0xBF, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0xC0, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0xC1, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0xC2, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0xC3, # CYRILLIC CAPITAL LETTER U - 0x0424: 0xC4, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0xC5, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0xC6, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0xC7, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0xC8, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0xC9, # CYRILLIC CAPITAL LETTER SHCHA - 0x042A: 0xCA, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042B: 0xCB, # CYRILLIC CAPITAL LETTER YERU - 0x042C: 0xCC, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042D: 0xCD, # CYRILLIC CAPITAL LETTER E - 0x042E: 0xCE, # CYRILLIC CAPITAL LETTER YU - 0x042F: 0xCF, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0xD0, # CYRILLIC SMALL LETTER A - 0x0431: 0xD1, # CYRILLIC SMALL LETTER BE - 0x0432: 0xD2, # CYRILLIC SMALL LETTER VE - 0x0433: 0xD3, # CYRILLIC SMALL LETTER GHE - 0x0434: 0xD4, # CYRILLIC SMALL LETTER DE - 0x0435: 0xD5, # CYRILLIC SMALL LETTER IE - 0x0436: 0xD6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0xD7, # CYRILLIC SMALL LETTER ZE - 0x0438: 0xD8, # CYRILLIC SMALL LETTER I - 0x0439: 0xD9, # CYRILLIC SMALL LETTER SHORT I - 0x043A: 0xDA, # CYRILLIC SMALL LETTER KA - 0x043B: 0xDB, # CYRILLIC SMALL LETTER EL - 0x043C: 0xDC, # CYRILLIC SMALL LETTER EM - 0x043D: 0xDD, # CYRILLIC SMALL LETTER EN - 0x043E: 0xDE, # CYRILLIC SMALL LETTER O - 0x043F: 0xDF, # CYRILLIC SMALL LETTER PE - 0x0440: 0xE0, # CYRILLIC SMALL LETTER ER - 0x0441: 0xE1, # CYRILLIC SMALL LETTER ES - 0x0442: 0xE2, # CYRILLIC SMALL LETTER TE - 0x0443: 0xE3, # CYRILLIC SMALL LETTER U - 0x0444: 0xE4, # CYRILLIC SMALL LETTER EF - 0x0445: 0xE5, # CYRILLIC SMALL LETTER HA - 0x0446: 0xE6, # CYRILLIC SMALL LETTER TSE - 0x0447: 0xE7, # CYRILLIC SMALL LETTER CHE - 0x0448: 0xE8, # CYRILLIC SMALL LETTER SHA - 0x0449: 0xE9, # CYRILLIC SMALL LETTER SHCHA - 0x044A: 0xEA, # CYRILLIC SMALL LETTER HARD SIGN - 0x044B: 0xEB, # CYRILLIC SMALL LETTER YERU - 0x044C: 0xEC, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044D: 0xED, # CYRILLIC SMALL LETTER E - 0x044E: 0xEE, # CYRILLIC SMALL LETTER YU - 0x044F: 0xEF, # CYRILLIC SMALL LETTER YA - 0x0451: 0xF1, # CYRILLIC SMALL LETTER IO - 0x0452: 0xF2, # CYRILLIC SMALL LETTER DJE - 0x0453: 0xF3, # CYRILLIC SMALL LETTER GJE - 0x0454: 0xF4, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0455: 0xF5, # CYRILLIC SMALL LETTER DZE - 0x0456: 0xF6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0457: 0xF7, # CYRILLIC SMALL LETTER YI - 0x0458: 0xF8, # CYRILLIC SMALL LETTER JE - 0x0459: 0xF9, # CYRILLIC SMALL LETTER LJE - 0x045A: 0xFA, # CYRILLIC SMALL LETTER NJE - 0x045B: 0xFB, # CYRILLIC SMALL LETTER TSHE - 0x045C: 0xFC, # CYRILLIC SMALL LETTER KJE - 0x045E: 0xFE, # CYRILLIC SMALL LETTER SHORT U - 0x045F: 0xFF, # CYRILLIC SMALL LETTER DZHE - 0x2116: 0xF0, # NUMERO SIGN + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A7: 0xFD, # SECTION SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x0401: 0xA1, # CYRILLIC CAPITAL LETTER IO + 0x0402: 0xA2, # CYRILLIC CAPITAL LETTER DJE + 0x0403: 0xA3, # CYRILLIC CAPITAL LETTER GJE + 0x0404: 0xA4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0405: 0xA5, # CYRILLIC CAPITAL LETTER DZE + 0x0406: 0xA6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0xA7, # CYRILLIC CAPITAL LETTER YI + 0x0408: 0xA8, # CYRILLIC CAPITAL LETTER JE + 0x0409: 0xA9, # CYRILLIC CAPITAL LETTER LJE + 0x040A: 0xAA, # CYRILLIC CAPITAL LETTER NJE + 0x040B: 0xAB, # CYRILLIC CAPITAL LETTER TSHE + 0x040C: 0xAC, # CYRILLIC CAPITAL LETTER KJE + 0x040E: 0xAE, # CYRILLIC CAPITAL LETTER SHORT U + 0x040F: 0xAF, # CYRILLIC CAPITAL LETTER DZHE + 0x0410: 0xB0, # CYRILLIC CAPITAL LETTER A + 0x0411: 0xB1, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0xB2, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0xB3, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0xB4, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0xB5, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0xB6, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0xB7, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0xB8, # CYRILLIC CAPITAL LETTER I + 0x0419: 0xB9, # CYRILLIC CAPITAL LETTER SHORT I + 0x041A: 0xBA, # CYRILLIC CAPITAL LETTER KA + 0x041B: 0xBB, # CYRILLIC CAPITAL LETTER EL + 0x041C: 0xBC, # CYRILLIC CAPITAL LETTER EM + 0x041D: 0xBD, # CYRILLIC CAPITAL LETTER EN + 0x041E: 0xBE, # CYRILLIC CAPITAL LETTER O + 0x041F: 0xBF, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0xC0, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0xC1, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0xC2, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0xC3, # CYRILLIC CAPITAL LETTER U + 0x0424: 0xC4, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0xC5, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0xC6, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0xC7, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0xC8, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0xC9, # CYRILLIC CAPITAL LETTER SHCHA + 0x042A: 0xCA, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042B: 0xCB, # CYRILLIC CAPITAL LETTER YERU + 0x042C: 0xCC, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042D: 0xCD, # CYRILLIC CAPITAL LETTER E + 0x042E: 0xCE, # CYRILLIC CAPITAL LETTER YU + 0x042F: 0xCF, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0xD0, # CYRILLIC SMALL LETTER A + 0x0431: 0xD1, # CYRILLIC SMALL LETTER BE + 0x0432: 0xD2, # CYRILLIC SMALL LETTER VE + 0x0433: 0xD3, # CYRILLIC SMALL LETTER GHE + 0x0434: 0xD4, # CYRILLIC SMALL LETTER DE + 0x0435: 0xD5, # CYRILLIC SMALL LETTER IE + 0x0436: 0xD6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0xD7, # CYRILLIC SMALL LETTER ZE + 0x0438: 0xD8, # CYRILLIC SMALL LETTER I + 0x0439: 0xD9, # CYRILLIC SMALL LETTER SHORT I + 0x043A: 0xDA, # CYRILLIC SMALL LETTER KA + 0x043B: 0xDB, # CYRILLIC SMALL LETTER EL + 0x043C: 0xDC, # CYRILLIC SMALL LETTER EM + 0x043D: 0xDD, # CYRILLIC SMALL LETTER EN + 0x043E: 0xDE, # CYRILLIC SMALL LETTER O + 0x043F: 0xDF, # CYRILLIC SMALL LETTER PE + 0x0440: 0xE0, # CYRILLIC SMALL LETTER ER + 0x0441: 0xE1, # CYRILLIC SMALL LETTER ES + 0x0442: 0xE2, # CYRILLIC SMALL LETTER TE + 0x0443: 0xE3, # CYRILLIC SMALL LETTER U + 0x0444: 0xE4, # CYRILLIC SMALL LETTER EF + 0x0445: 0xE5, # CYRILLIC SMALL LETTER HA + 0x0446: 0xE6, # CYRILLIC SMALL LETTER TSE + 0x0447: 0xE7, # CYRILLIC SMALL LETTER CHE + 0x0448: 0xE8, # CYRILLIC SMALL LETTER SHA + 0x0449: 0xE9, # CYRILLIC SMALL LETTER SHCHA + 0x044A: 0xEA, # CYRILLIC SMALL LETTER HARD SIGN + 0x044B: 0xEB, # CYRILLIC SMALL LETTER YERU + 0x044C: 0xEC, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044D: 0xED, # CYRILLIC SMALL LETTER E + 0x044E: 0xEE, # CYRILLIC SMALL LETTER YU + 0x044F: 0xEF, # CYRILLIC SMALL LETTER YA + 0x0451: 0xF1, # CYRILLIC SMALL LETTER IO + 0x0452: 0xF2, # CYRILLIC SMALL LETTER DJE + 0x0453: 0xF3, # CYRILLIC SMALL LETTER GJE + 0x0454: 0xF4, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0455: 0xF5, # CYRILLIC SMALL LETTER DZE + 0x0456: 0xF6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0xF7, # CYRILLIC SMALL LETTER YI + 0x0458: 0xF8, # CYRILLIC SMALL LETTER JE + 0x0459: 0xF9, # CYRILLIC SMALL LETTER LJE + 0x045A: 0xFA, # CYRILLIC SMALL LETTER NJE + 0x045B: 0xFB, # CYRILLIC SMALL LETTER TSHE + 0x045C: 0xFC, # CYRILLIC SMALL LETTER KJE + 0x045E: 0xFE, # CYRILLIC SMALL LETTER SHORT U + 0x045F: 0xFF, # CYRILLIC SMALL LETTER DZHE + 0x2116: 0xF0, # NUMERO SIGN } - Modified: python/branches/ssize_t/Lib/encodings/iso8859_6.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/iso8859_6.py (original) +++ python/branches/ssize_t/Lib/encodings/iso8859_6.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,171 +32,171 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\xa4' # 0xA4 -> CURRENCY SIGN u'\ufffe' u'\ufffe' u'\ufffe' @@ -204,8 +204,8 @@ u'\ufffe' u'\ufffe' u'\ufffe' - u'\u060c' # 0xAC -> ARABIC COMMA - u'\xad' # 0xAD -> SOFT HYPHEN + u'\u060c' # 0xAC -> ARABIC COMMA + u'\xad' # 0xAD -> SOFT HYPHEN u'\ufffe' u'\ufffe' u'\ufffe' @@ -219,62 +219,62 @@ u'\ufffe' u'\ufffe' u'\ufffe' - u'\u061b' # 0xBB -> ARABIC SEMICOLON - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\u061f' # 0xBF -> ARABIC QUESTION MARK - u'\ufffe' - u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA - u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0xC7 -> ARABIC LETTER ALEF - u'\u0628' # 0xC8 -> ARABIC LETTER BEH - u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0xCA -> ARABIC LETTER TEH - u'\u062b' # 0xCB -> ARABIC LETTER THEH - u'\u062c' # 0xCC -> ARABIC LETTER JEEM - u'\u062d' # 0xCD -> ARABIC LETTER HAH - u'\u062e' # 0xCE -> ARABIC LETTER KHAH - u'\u062f' # 0xCF -> ARABIC LETTER DAL - u'\u0630' # 0xD0 -> ARABIC LETTER THAL - u'\u0631' # 0xD1 -> ARABIC LETTER REH - u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN - u'\u0633' # 0xD3 -> ARABIC LETTER SEEN - u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN - u'\u0635' # 0xD5 -> ARABIC LETTER SAD - u'\u0636' # 0xD6 -> ARABIC LETTER DAD - u'\u0637' # 0xD7 -> ARABIC LETTER TAH - u'\u0638' # 0xD8 -> ARABIC LETTER ZAH - u'\u0639' # 0xD9 -> ARABIC LETTER AIN - u'\u063a' # 0xDA -> ARABIC LETTER GHAIN + u'\u061b' # 0xBB -> ARABIC SEMICOLON + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u061f' # 0xBF -> ARABIC QUESTION MARK + u'\ufffe' + u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA + u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE + u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE + u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE + u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW + u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE + u'\u0627' # 0xC7 -> ARABIC LETTER ALEF + u'\u0628' # 0xC8 -> ARABIC LETTER BEH + u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA + u'\u062a' # 0xCA -> ARABIC LETTER TEH + u'\u062b' # 0xCB -> ARABIC LETTER THEH + u'\u062c' # 0xCC -> ARABIC LETTER JEEM + u'\u062d' # 0xCD -> ARABIC LETTER HAH + u'\u062e' # 0xCE -> ARABIC LETTER KHAH + u'\u062f' # 0xCF -> ARABIC LETTER DAL + u'\u0630' # 0xD0 -> ARABIC LETTER THAL + u'\u0631' # 0xD1 -> ARABIC LETTER REH + u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN + u'\u0633' # 0xD3 -> ARABIC LETTER SEEN + u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN + u'\u0635' # 0xD5 -> ARABIC LETTER SAD + u'\u0636' # 0xD6 -> ARABIC LETTER DAD + u'\u0637' # 0xD7 -> ARABIC LETTER TAH + u'\u0638' # 0xD8 -> ARABIC LETTER ZAH + u'\u0639' # 0xD9 -> ARABIC LETTER AIN + u'\u063a' # 0xDA -> ARABIC LETTER GHAIN u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' - u'\u0640' # 0xE0 -> ARABIC TATWEEL - u'\u0641' # 0xE1 -> ARABIC LETTER FEH - u'\u0642' # 0xE2 -> ARABIC LETTER QAF - u'\u0643' # 0xE3 -> ARABIC LETTER KAF - u'\u0644' # 0xE4 -> ARABIC LETTER LAM - u'\u0645' # 0xE5 -> ARABIC LETTER MEEM - u'\u0646' # 0xE6 -> ARABIC LETTER NOON - u'\u0647' # 0xE7 -> ARABIC LETTER HEH - u'\u0648' # 0xE8 -> ARABIC LETTER WAW - u'\u0649' # 0xE9 -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0xEA -> ARABIC LETTER YEH - u'\u064b' # 0xEB -> ARABIC FATHATAN - u'\u064c' # 0xEC -> ARABIC DAMMATAN - u'\u064d' # 0xED -> ARABIC KASRATAN - u'\u064e' # 0xEE -> ARABIC FATHA - u'\u064f' # 0xEF -> ARABIC DAMMA - u'\u0650' # 0xF0 -> ARABIC KASRA - u'\u0651' # 0xF1 -> ARABIC SHADDA - u'\u0652' # 0xF2 -> ARABIC SUKUN + u'\u0640' # 0xE0 -> ARABIC TATWEEL + u'\u0641' # 0xE1 -> ARABIC LETTER FEH + u'\u0642' # 0xE2 -> ARABIC LETTER QAF + u'\u0643' # 0xE3 -> ARABIC LETTER KAF + u'\u0644' # 0xE4 -> ARABIC LETTER LAM + u'\u0645' # 0xE5 -> ARABIC LETTER MEEM + u'\u0646' # 0xE6 -> ARABIC LETTER NOON + u'\u0647' # 0xE7 -> ARABIC LETTER HEH + u'\u0648' # 0xE8 -> ARABIC LETTER WAW + u'\u0649' # 0xE9 -> ARABIC LETTER ALEF MAKSURA + u'\u064a' # 0xEA -> ARABIC LETTER YEH + u'\u064b' # 0xEB -> ARABIC FATHATAN + u'\u064c' # 0xEC -> ARABIC DAMMATAN + u'\u064d' # 0xED -> ARABIC KASRATAN + u'\u064e' # 0xEE -> ARABIC FATHA + u'\u064f' # 0xEF -> ARABIC DAMMA + u'\u0650' # 0xF0 -> ARABIC KASRA + u'\u0651' # 0xF1 -> ARABIC SHADDA + u'\u0652' # 0xF2 -> ARABIC SUKUN u'\ufffe' u'\ufffe' u'\ufffe' @@ -293,216 +293,215 @@ ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # - 0x0081: 0x81, # - 0x0082: 0x82, # - 0x0083: 0x83, # - 0x0084: 0x84, # - 0x0085: 0x85, # - 0x0086: 0x86, # - 0x0087: 0x87, # - 0x0088: 0x88, # - 0x0089: 0x89, # - 0x008A: 0x8A, # - 0x008B: 0x8B, # - 0x008C: 0x8C, # - 0x008D: 0x8D, # - 0x008E: 0x8E, # - 0x008F: 0x8F, # - 0x0090: 0x90, # - 0x0091: 0x91, # - 0x0092: 0x92, # - 0x0093: 0x93, # - 0x0094: 0x94, # - 0x0095: 0x95, # - 0x0096: 0x96, # - 0x0097: 0x97, # - 0x0098: 0x98, # - 0x0099: 0x99, # - 0x009A: 0x9A, # - 0x009B: 0x9B, # - 0x009C: 0x9C, # - 0x009D: 0x9D, # - 0x009E: 0x9E, # - 0x009F: 0x9F, # - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x060C: 0xAC, # ARABIC COMMA - 0x061B: 0xBB, # ARABIC SEMICOLON - 0x061F: 0xBF, # ARABIC QUESTION MARK - 0x0621: 0xC1, # ARABIC LETTER HAMZA - 0x0622: 0xC2, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x0623: 0xC3, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x0624: 0xC4, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x0625: 0xC5, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x0626: 0xC6, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x0627: 0xC7, # ARABIC LETTER ALEF - 0x0628: 0xC8, # ARABIC LETTER BEH - 0x0629: 0xC9, # ARABIC LETTER TEH MARBUTA - 0x062A: 0xCA, # ARABIC LETTER TEH - 0x062B: 0xCB, # ARABIC LETTER THEH - 0x062C: 0xCC, # ARABIC LETTER JEEM - 0x062D: 0xCD, # ARABIC LETTER HAH - 0x062E: 0xCE, # ARABIC LETTER KHAH - 0x062F: 0xCF, # ARABIC LETTER DAL - 0x0630: 0xD0, # ARABIC LETTER THAL - 0x0631: 0xD1, # ARABIC LETTER REH - 0x0632: 0xD2, # ARABIC LETTER ZAIN - 0x0633: 0xD3, # ARABIC LETTER SEEN - 0x0634: 0xD4, # ARABIC LETTER SHEEN - 0x0635: 0xD5, # ARABIC LETTER SAD - 0x0636: 0xD6, # ARABIC LETTER DAD - 0x0637: 0xD7, # ARABIC LETTER TAH - 0x0638: 0xD8, # ARABIC LETTER ZAH - 0x0639: 0xD9, # ARABIC LETTER AIN - 0x063A: 0xDA, # ARABIC LETTER GHAIN - 0x0640: 0xE0, # ARABIC TATWEEL - 0x0641: 0xE1, # ARABIC LETTER FEH - 0x0642: 0xE2, # ARABIC LETTER QAF - 0x0643: 0xE3, # ARABIC LETTER KAF - 0x0644: 0xE4, # ARABIC LETTER LAM - 0x0645: 0xE5, # ARABIC LETTER MEEM - 0x0646: 0xE6, # ARABIC LETTER NOON - 0x0647: 0xE7, # ARABIC LETTER HEH - 0x0648: 0xE8, # ARABIC LETTER WAW - 0x0649: 0xE9, # ARABIC LETTER ALEF MAKSURA - 0x064A: 0xEA, # ARABIC LETTER YEH - 0x064B: 0xEB, # ARABIC FATHATAN - 0x064C: 0xEC, # ARABIC DAMMATAN - 0x064D: 0xED, # ARABIC KASRATAN - 0x064E: 0xEE, # ARABIC FATHA - 0x064F: 0xEF, # ARABIC DAMMA - 0x0650: 0xF0, # ARABIC KASRA - 0x0651: 0xF1, # ARABIC SHADDA - 0x0652: 0xF2, # ARABIC SUKUN + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x060C: 0xAC, # ARABIC COMMA + 0x061B: 0xBB, # ARABIC SEMICOLON + 0x061F: 0xBF, # ARABIC QUESTION MARK + 0x0621: 0xC1, # ARABIC LETTER HAMZA + 0x0622: 0xC2, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x0623: 0xC3, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x0624: 0xC4, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x0625: 0xC5, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x0626: 0xC6, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x0627: 0xC7, # ARABIC LETTER ALEF + 0x0628: 0xC8, # ARABIC LETTER BEH + 0x0629: 0xC9, # ARABIC LETTER TEH MARBUTA + 0x062A: 0xCA, # ARABIC LETTER TEH + 0x062B: 0xCB, # ARABIC LETTER THEH + 0x062C: 0xCC, # ARABIC LETTER JEEM + 0x062D: 0xCD, # ARABIC LETTER HAH + 0x062E: 0xCE, # ARABIC LETTER KHAH + 0x062F: 0xCF, # ARABIC LETTER DAL + 0x0630: 0xD0, # ARABIC LETTER THAL + 0x0631: 0xD1, # ARABIC LETTER REH + 0x0632: 0xD2, # ARABIC LETTER ZAIN + 0x0633: 0xD3, # ARABIC LETTER SEEN + 0x0634: 0xD4, # ARABIC LETTER SHEEN + 0x0635: 0xD5, # ARABIC LETTER SAD + 0x0636: 0xD6, # ARABIC LETTER DAD + 0x0637: 0xD7, # ARABIC LETTER TAH + 0x0638: 0xD8, # ARABIC LETTER ZAH + 0x0639: 0xD9, # ARABIC LETTER AIN + 0x063A: 0xDA, # ARABIC LETTER GHAIN + 0x0640: 0xE0, # ARABIC TATWEEL + 0x0641: 0xE1, # ARABIC LETTER FEH + 0x0642: 0xE2, # ARABIC LETTER QAF + 0x0643: 0xE3, # ARABIC LETTER KAF + 0x0644: 0xE4, # ARABIC LETTER LAM + 0x0645: 0xE5, # ARABIC LETTER MEEM + 0x0646: 0xE6, # ARABIC LETTER NOON + 0x0647: 0xE7, # ARABIC LETTER HEH + 0x0648: 0xE8, # ARABIC LETTER WAW + 0x0649: 0xE9, # ARABIC LETTER ALEF MAKSURA + 0x064A: 0xEA, # ARABIC LETTER YEH + 0x064B: 0xEB, # ARABIC FATHATAN + 0x064C: 0xEC, # ARABIC DAMMATAN + 0x064D: 0xED, # ARABIC KASRATAN + 0x064E: 0xEE, # ARABIC FATHA + 0x064F: 0xEF, # ARABIC DAMMA + 0x0650: 0xF0, # ARABIC KASRA + 0x0651: 0xF1, # ARABIC SHADDA + 0x0652: 0xF2, # ARABIC SUKUN } - Modified: python/branches/ssize_t/Lib/encodings/iso8859_7.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/iso8859_7.py (original) +++ python/branches/ssize_t/Lib/encodings/iso8859_7.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,519 +32,518 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u2018' # 0xA1 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xA2 -> RIGHT SINGLE QUOTATION MARK - u'\xa3' # 0xA3 -> POUND SIGN - u'\u20ac' # 0xA4 -> EURO SIGN - u'\u20af' # 0xA5 -> DRACHMA SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u037a' # 0xAA -> GREEK YPOGEGRAMMENI - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u2018' # 0xA1 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xA2 -> RIGHT SINGLE QUOTATION MARK + u'\xa3' # 0xA3 -> POUND SIGN + u'\u20ac' # 0xA4 -> EURO SIGN + u'\u20af' # 0xA5 -> DRACHMA SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u037a' # 0xAA -> GREEK YPOGEGRAMMENI + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN u'\ufffe' - u'\u2015' # 0xAF -> HORIZONTAL BAR - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\u0384' # 0xB4 -> GREEK TONOS - u'\u0385' # 0xB5 -> GREEK DIALYTIKA TONOS - u'\u0386' # 0xB6 -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u0388' # 0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0389' # 0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u038c' # 0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\u038e' # 0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u038f' # 0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\u0390' # 0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u0391' # 0xC1 -> GREEK CAPITAL LETTER ALPHA - u'\u0392' # 0xC2 -> GREEK CAPITAL LETTER BETA - u'\u0393' # 0xC3 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0xC4 -> GREEK CAPITAL LETTER DELTA - u'\u0395' # 0xC5 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0xC6 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0xC7 -> GREEK CAPITAL LETTER ETA - u'\u0398' # 0xC8 -> GREEK CAPITAL LETTER THETA - u'\u0399' # 0xC9 -> GREEK CAPITAL LETTER IOTA - u'\u039a' # 0xCA -> GREEK CAPITAL LETTER KAPPA - u'\u039b' # 0xCB -> GREEK CAPITAL LETTER LAMDA - u'\u039c' # 0xCC -> GREEK CAPITAL LETTER MU - u'\u039d' # 0xCD -> GREEK CAPITAL LETTER NU - u'\u039e' # 0xCE -> GREEK CAPITAL LETTER XI - u'\u039f' # 0xCF -> GREEK CAPITAL LETTER OMICRON - u'\u03a0' # 0xD0 -> GREEK CAPITAL LETTER PI - u'\u03a1' # 0xD1 -> GREEK CAPITAL LETTER RHO + u'\u2015' # 0xAF -> HORIZONTAL BAR + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\u0384' # 0xB4 -> GREEK TONOS + u'\u0385' # 0xB5 -> GREEK DIALYTIKA TONOS + u'\u0386' # 0xB6 -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u0388' # 0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0389' # 0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u038c' # 0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\u038e' # 0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u038f' # 0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\u0390' # 0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u0391' # 0xC1 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0xC2 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0xC3 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0xC4 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0xC5 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0xC6 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0xC7 -> GREEK CAPITAL LETTER ETA + u'\u0398' # 0xC8 -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0xC9 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0xCA -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0xCB -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0xCC -> GREEK CAPITAL LETTER MU + u'\u039d' # 0xCD -> GREEK CAPITAL LETTER NU + u'\u039e' # 0xCE -> GREEK CAPITAL LETTER XI + u'\u039f' # 0xCF -> GREEK CAPITAL LETTER OMICRON + u'\u03a0' # 0xD0 -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0xD1 -> GREEK CAPITAL LETTER RHO u'\ufffe' - u'\u03a3' # 0xD3 -> GREEK CAPITAL LETTER SIGMA - u'\u03a4' # 0xD4 -> GREEK CAPITAL LETTER TAU - u'\u03a5' # 0xD5 -> GREEK CAPITAL LETTER UPSILON - u'\u03a6' # 0xD6 -> GREEK CAPITAL LETTER PHI - u'\u03a7' # 0xD7 -> GREEK CAPITAL LETTER CHI - u'\u03a8' # 0xD8 -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0xD9 -> GREEK CAPITAL LETTER OMEGA - u'\u03aa' # 0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\u03ab' # 0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\u03ac' # 0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u03ad' # 0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0xDE -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03af' # 0xDF -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03b0' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA - u'\u03b3' # 0xE3 -> GREEK SMALL LETTER GAMMA - u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON - u'\u03b6' # 0xE6 -> GREEK SMALL LETTER ZETA - u'\u03b7' # 0xE7 -> GREEK SMALL LETTER ETA - u'\u03b8' # 0xE8 -> GREEK SMALL LETTER THETA - u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0xEA -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0xEB -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0xEC -> GREEK SMALL LETTER MU - u'\u03bd' # 0xED -> GREEK SMALL LETTER NU - u'\u03be' # 0xEE -> GREEK SMALL LETTER XI - u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI - u'\u03c1' # 0xF1 -> GREEK SMALL LETTER RHO - u'\u03c2' # 0xF2 -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA - u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU - u'\u03c5' # 0xF5 -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0xF6 -> GREEK SMALL LETTER PHI - u'\u03c7' # 0xF7 -> GREEK SMALL LETTER CHI - u'\u03c8' # 0xF8 -> GREEK SMALL LETTER PSI - u'\u03c9' # 0xF9 -> GREEK SMALL LETTER OMEGA - u'\u03ca' # 0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03cb' # 0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03cc' # 0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03ce' # 0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u03a3' # 0xD3 -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0xD4 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0xD5 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0xD6 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0xD7 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0xD8 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0xD9 -> GREEK CAPITAL LETTER OMEGA + u'\u03aa' # 0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u03ab' # 0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\u03ac' # 0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u03ad' # 0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0xDE -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0xDF -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03b0' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0xE3 -> GREEK SMALL LETTER GAMMA + u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON + u'\u03b6' # 0xE6 -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0xE7 -> GREEK SMALL LETTER ETA + u'\u03b8' # 0xE8 -> GREEK SMALL LETTER THETA + u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0xEA -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0xEB -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0xEC -> GREEK SMALL LETTER MU + u'\u03bd' # 0xED -> GREEK SMALL LETTER NU + u'\u03be' # 0xEE -> GREEK SMALL LETTER XI + u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI + u'\u03c1' # 0xF1 -> GREEK SMALL LETTER RHO + u'\u03c2' # 0xF2 -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA + u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU + u'\u03c5' # 0xF5 -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0xF6 -> GREEK SMALL LETTER PHI + u'\u03c7' # 0xF7 -> GREEK SMALL LETTER CHI + u'\u03c8' # 0xF8 -> GREEK SMALL LETTER PSI + u'\u03c9' # 0xF9 -> GREEK SMALL LETTER OMEGA + u'\u03ca' # 0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03cb' # 0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03cc' # 0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03ce' # 0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS u'\ufffe' ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # - 0x0081: 0x81, # - 0x0082: 0x82, # - 0x0083: 0x83, # - 0x0084: 0x84, # - 0x0085: 0x85, # - 0x0086: 0x86, # - 0x0087: 0x87, # - 0x0088: 0x88, # - 0x0089: 0x89, # - 0x008A: 0x8A, # - 0x008B: 0x8B, # - 0x008C: 0x8C, # - 0x008D: 0x8D, # - 0x008E: 0x8E, # - 0x008F: 0x8F, # - 0x0090: 0x90, # - 0x0091: 0x91, # - 0x0092: 0x92, # - 0x0093: 0x93, # - 0x0094: 0x94, # - 0x0095: 0x95, # - 0x0096: 0x96, # - 0x0097: 0x97, # - 0x0098: 0x98, # - 0x0099: 0x99, # - 0x009A: 0x9A, # - 0x009B: 0x9B, # - 0x009C: 0x9C, # - 0x009D: 0x9D, # - 0x009E: 0x9E, # - 0x009F: 0x9F, # - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A3: 0xA3, # POUND SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B7: 0xB7, # MIDDLE DOT - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x037A: 0xAA, # GREEK YPOGEGRAMMENI - 0x0384: 0xB4, # GREEK TONOS - 0x0385: 0xB5, # GREEK DIALYTIKA TONOS - 0x0386: 0xB6, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0388: 0xB8, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0389: 0xB9, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038A: 0xBA, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038C: 0xBC, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038E: 0xBE, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038F: 0xBF, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0390: 0xC0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x0391: 0xC1, # GREEK CAPITAL LETTER ALPHA - 0x0392: 0xC2, # GREEK CAPITAL LETTER BETA - 0x0393: 0xC3, # GREEK CAPITAL LETTER GAMMA - 0x0394: 0xC4, # GREEK CAPITAL LETTER DELTA - 0x0395: 0xC5, # GREEK CAPITAL LETTER EPSILON - 0x0396: 0xC6, # GREEK CAPITAL LETTER ZETA - 0x0397: 0xC7, # GREEK CAPITAL LETTER ETA - 0x0398: 0xC8, # GREEK CAPITAL LETTER THETA - 0x0399: 0xC9, # GREEK CAPITAL LETTER IOTA - 0x039A: 0xCA, # GREEK CAPITAL LETTER KAPPA - 0x039B: 0xCB, # GREEK CAPITAL LETTER LAMDA - 0x039C: 0xCC, # GREEK CAPITAL LETTER MU - 0x039D: 0xCD, # GREEK CAPITAL LETTER NU - 0x039E: 0xCE, # GREEK CAPITAL LETTER XI - 0x039F: 0xCF, # GREEK CAPITAL LETTER OMICRON - 0x03A0: 0xD0, # GREEK CAPITAL LETTER PI - 0x03A1: 0xD1, # GREEK CAPITAL LETTER RHO - 0x03A3: 0xD3, # GREEK CAPITAL LETTER SIGMA - 0x03A4: 0xD4, # GREEK CAPITAL LETTER TAU - 0x03A5: 0xD5, # GREEK CAPITAL LETTER UPSILON - 0x03A6: 0xD6, # GREEK CAPITAL LETTER PHI - 0x03A7: 0xD7, # GREEK CAPITAL LETTER CHI - 0x03A8: 0xD8, # GREEK CAPITAL LETTER PSI - 0x03A9: 0xD9, # GREEK CAPITAL LETTER OMEGA - 0x03AA: 0xDA, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03AB: 0xDB, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03AC: 0xDC, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03AD: 0xDD, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03AE: 0xDE, # GREEK SMALL LETTER ETA WITH TONOS - 0x03AF: 0xDF, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03B0: 0xE0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x03B1: 0xE1, # GREEK SMALL LETTER ALPHA - 0x03B2: 0xE2, # GREEK SMALL LETTER BETA - 0x03B3: 0xE3, # GREEK SMALL LETTER GAMMA - 0x03B4: 0xE4, # GREEK SMALL LETTER DELTA - 0x03B5: 0xE5, # GREEK SMALL LETTER EPSILON - 0x03B6: 0xE6, # GREEK SMALL LETTER ZETA - 0x03B7: 0xE7, # GREEK SMALL LETTER ETA - 0x03B8: 0xE8, # GREEK SMALL LETTER THETA - 0x03B9: 0xE9, # GREEK SMALL LETTER IOTA - 0x03BA: 0xEA, # GREEK SMALL LETTER KAPPA - 0x03BB: 0xEB, # GREEK SMALL LETTER LAMDA - 0x03BC: 0xEC, # GREEK SMALL LETTER MU - 0x03BD: 0xED, # GREEK SMALL LETTER NU - 0x03BE: 0xEE, # GREEK SMALL LETTER XI - 0x03BF: 0xEF, # GREEK SMALL LETTER OMICRON - 0x03C0: 0xF0, # GREEK SMALL LETTER PI - 0x03C1: 0xF1, # GREEK SMALL LETTER RHO - 0x03C2: 0xF2, # GREEK SMALL LETTER FINAL SIGMA - 0x03C3: 0xF3, # GREEK SMALL LETTER SIGMA - 0x03C4: 0xF4, # GREEK SMALL LETTER TAU - 0x03C5: 0xF5, # GREEK SMALL LETTER UPSILON - 0x03C6: 0xF6, # GREEK SMALL LETTER PHI - 0x03C7: 0xF7, # GREEK SMALL LETTER CHI - 0x03C8: 0xF8, # GREEK SMALL LETTER PSI - 0x03C9: 0xF9, # GREEK SMALL LETTER OMEGA - 0x03CA: 0xFA, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03CB: 0xFB, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03CC: 0xFC, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03CD: 0xFD, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03CE: 0xFE, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x2015: 0xAF, # HORIZONTAL BAR - 0x2018: 0xA1, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xA2, # RIGHT SINGLE QUOTATION MARK - 0x20AC: 0xA4, # EURO SIGN - 0x20AF: 0xA5, # DRACHMA SIGN + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A3: 0xA3, # POUND SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B7: 0xB7, # MIDDLE DOT + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x037A: 0xAA, # GREEK YPOGEGRAMMENI + 0x0384: 0xB4, # GREEK TONOS + 0x0385: 0xB5, # GREEK DIALYTIKA TONOS + 0x0386: 0xB6, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0xB8, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0xB9, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038A: 0xBA, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038C: 0xBC, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038E: 0xBE, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038F: 0xBF, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0xC0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x0391: 0xC1, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0xC2, # GREEK CAPITAL LETTER BETA + 0x0393: 0xC3, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0xC4, # GREEK CAPITAL LETTER DELTA + 0x0395: 0xC5, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0xC6, # GREEK CAPITAL LETTER ZETA + 0x0397: 0xC7, # GREEK CAPITAL LETTER ETA + 0x0398: 0xC8, # GREEK CAPITAL LETTER THETA + 0x0399: 0xC9, # GREEK CAPITAL LETTER IOTA + 0x039A: 0xCA, # GREEK CAPITAL LETTER KAPPA + 0x039B: 0xCB, # GREEK CAPITAL LETTER LAMDA + 0x039C: 0xCC, # GREEK CAPITAL LETTER MU + 0x039D: 0xCD, # GREEK CAPITAL LETTER NU + 0x039E: 0xCE, # GREEK CAPITAL LETTER XI + 0x039F: 0xCF, # GREEK CAPITAL LETTER OMICRON + 0x03A0: 0xD0, # GREEK CAPITAL LETTER PI + 0x03A1: 0xD1, # GREEK CAPITAL LETTER RHO + 0x03A3: 0xD3, # GREEK CAPITAL LETTER SIGMA + 0x03A4: 0xD4, # GREEK CAPITAL LETTER TAU + 0x03A5: 0xD5, # GREEK CAPITAL LETTER UPSILON + 0x03A6: 0xD6, # GREEK CAPITAL LETTER PHI + 0x03A7: 0xD7, # GREEK CAPITAL LETTER CHI + 0x03A8: 0xD8, # GREEK CAPITAL LETTER PSI + 0x03A9: 0xD9, # GREEK CAPITAL LETTER OMEGA + 0x03AA: 0xDA, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03AB: 0xDB, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03AC: 0xDC, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03AD: 0xDD, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03AE: 0xDE, # GREEK SMALL LETTER ETA WITH TONOS + 0x03AF: 0xDF, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03B0: 0xE0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03B1: 0xE1, # GREEK SMALL LETTER ALPHA + 0x03B2: 0xE2, # GREEK SMALL LETTER BETA + 0x03B3: 0xE3, # GREEK SMALL LETTER GAMMA + 0x03B4: 0xE4, # GREEK SMALL LETTER DELTA + 0x03B5: 0xE5, # GREEK SMALL LETTER EPSILON + 0x03B6: 0xE6, # GREEK SMALL LETTER ZETA + 0x03B7: 0xE7, # GREEK SMALL LETTER ETA + 0x03B8: 0xE8, # GREEK SMALL LETTER THETA + 0x03B9: 0xE9, # GREEK SMALL LETTER IOTA + 0x03BA: 0xEA, # GREEK SMALL LETTER KAPPA + 0x03BB: 0xEB, # GREEK SMALL LETTER LAMDA + 0x03BC: 0xEC, # GREEK SMALL LETTER MU + 0x03BD: 0xED, # GREEK SMALL LETTER NU + 0x03BE: 0xEE, # GREEK SMALL LETTER XI + 0x03BF: 0xEF, # GREEK SMALL LETTER OMICRON + 0x03C0: 0xF0, # GREEK SMALL LETTER PI + 0x03C1: 0xF1, # GREEK SMALL LETTER RHO + 0x03C2: 0xF2, # GREEK SMALL LETTER FINAL SIGMA + 0x03C3: 0xF3, # GREEK SMALL LETTER SIGMA + 0x03C4: 0xF4, # GREEK SMALL LETTER TAU + 0x03C5: 0xF5, # GREEK SMALL LETTER UPSILON + 0x03C6: 0xF6, # GREEK SMALL LETTER PHI + 0x03C7: 0xF7, # GREEK SMALL LETTER CHI + 0x03C8: 0xF8, # GREEK SMALL LETTER PSI + 0x03C9: 0xF9, # GREEK SMALL LETTER OMEGA + 0x03CA: 0xFA, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03CB: 0xFB, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03CC: 0xFC, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03CD: 0xFD, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03CE: 0xFE, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2015: 0xAF, # HORIZONTAL BAR + 0x2018: 0xA1, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xA2, # RIGHT SINGLE QUOTATION MARK + 0x20AC: 0xA4, # EURO SIGN + 0x20AF: 0xA5, # DRACHMA SIGN } - Modified: python/branches/ssize_t/Lib/encodings/iso8859_8.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/iso8859_8.py (original) +++ python/branches/ssize_t/Lib/encodings/iso8859_8.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,197 +32,197 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\ufffe' - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xd7' # 0xAA -> MULTIPLICATION SIGN - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xf7' # 0xBA -> DIVISION SIGN - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\ufffe' + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xd7' # 0xAA -> MULTIPLICATION SIGN + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xf7' # 0xBA -> DIVISION SIGN + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS u'\ufffe' u'\ufffe' u'\ufffe' @@ -255,263 +255,262 @@ u'\ufffe' u'\ufffe' u'\ufffe' - u'\u2017' # 0xDF -> DOUBLE LOW LINE - u'\u05d0' # 0xE0 -> HEBREW LETTER ALEF - u'\u05d1' # 0xE1 -> HEBREW LETTER BET - u'\u05d2' # 0xE2 -> HEBREW LETTER GIMEL - u'\u05d3' # 0xE3 -> HEBREW LETTER DALET - u'\u05d4' # 0xE4 -> HEBREW LETTER HE - u'\u05d5' # 0xE5 -> HEBREW LETTER VAV - u'\u05d6' # 0xE6 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0xE7 -> HEBREW LETTER HET - u'\u05d8' # 0xE8 -> HEBREW LETTER TET - u'\u05d9' # 0xE9 -> HEBREW LETTER YOD - u'\u05da' # 0xEA -> HEBREW LETTER FINAL KAF - u'\u05db' # 0xEB -> HEBREW LETTER KAF - u'\u05dc' # 0xEC -> HEBREW LETTER LAMED - u'\u05dd' # 0xED -> HEBREW LETTER FINAL MEM - u'\u05de' # 0xEE -> HEBREW LETTER MEM - u'\u05df' # 0xEF -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0xF0 -> HEBREW LETTER NUN - u'\u05e1' # 0xF1 -> HEBREW LETTER SAMEKH - u'\u05e2' # 0xF2 -> HEBREW LETTER AYIN - u'\u05e3' # 0xF3 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0xF4 -> HEBREW LETTER PE - u'\u05e5' # 0xF5 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0xF6 -> HEBREW LETTER TSADI - u'\u05e7' # 0xF7 -> HEBREW LETTER QOF - u'\u05e8' # 0xF8 -> HEBREW LETTER RESH - u'\u05e9' # 0xF9 -> HEBREW LETTER SHIN - u'\u05ea' # 0xFA -> HEBREW LETTER TAV + u'\u2017' # 0xDF -> DOUBLE LOW LINE + u'\u05d0' # 0xE0 -> HEBREW LETTER ALEF + u'\u05d1' # 0xE1 -> HEBREW LETTER BET + u'\u05d2' # 0xE2 -> HEBREW LETTER GIMEL + u'\u05d3' # 0xE3 -> HEBREW LETTER DALET + u'\u05d4' # 0xE4 -> HEBREW LETTER HE + u'\u05d5' # 0xE5 -> HEBREW LETTER VAV + u'\u05d6' # 0xE6 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0xE7 -> HEBREW LETTER HET + u'\u05d8' # 0xE8 -> HEBREW LETTER TET + u'\u05d9' # 0xE9 -> HEBREW LETTER YOD + u'\u05da' # 0xEA -> HEBREW LETTER FINAL KAF + u'\u05db' # 0xEB -> HEBREW LETTER KAF + u'\u05dc' # 0xEC -> HEBREW LETTER LAMED + u'\u05dd' # 0xED -> HEBREW LETTER FINAL MEM + u'\u05de' # 0xEE -> HEBREW LETTER MEM + u'\u05df' # 0xEF -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0xF0 -> HEBREW LETTER NUN + u'\u05e1' # 0xF1 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0xF2 -> HEBREW LETTER AYIN + u'\u05e3' # 0xF3 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0xF4 -> HEBREW LETTER PE + u'\u05e5' # 0xF5 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0xF6 -> HEBREW LETTER TSADI + u'\u05e7' # 0xF7 -> HEBREW LETTER QOF + u'\u05e8' # 0xF8 -> HEBREW LETTER RESH + u'\u05e9' # 0xF9 -> HEBREW LETTER SHIN + u'\u05ea' # 0xFA -> HEBREW LETTER TAV u'\ufffe' u'\ufffe' - u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK - u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK + u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK + u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK u'\ufffe' ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # - 0x0081: 0x81, # - 0x0082: 0x82, # - 0x0083: 0x83, # - 0x0084: 0x84, # - 0x0085: 0x85, # - 0x0086: 0x86, # - 0x0087: 0x87, # - 0x0088: 0x88, # - 0x0089: 0x89, # - 0x008A: 0x8A, # - 0x008B: 0x8B, # - 0x008C: 0x8C, # - 0x008D: 0x8D, # - 0x008E: 0x8E, # - 0x008F: 0x8F, # - 0x0090: 0x90, # - 0x0091: 0x91, # - 0x0092: 0x92, # - 0x0093: 0x93, # - 0x0094: 0x94, # - 0x0095: 0x95, # - 0x0096: 0x96, # - 0x0097: 0x97, # - 0x0098: 0x98, # - 0x0099: 0x99, # - 0x009A: 0x9A, # - 0x009B: 0x9B, # - 0x009C: 0x9C, # - 0x009D: 0x9D, # - 0x009E: 0x9E, # - 0x009F: 0x9F, # - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00D7: 0xAA, # MULTIPLICATION SIGN - 0x00F7: 0xBA, # DIVISION SIGN - 0x05D0: 0xE0, # HEBREW LETTER ALEF - 0x05D1: 0xE1, # HEBREW LETTER BET - 0x05D2: 0xE2, # HEBREW LETTER GIMEL - 0x05D3: 0xE3, # HEBREW LETTER DALET - 0x05D4: 0xE4, # HEBREW LETTER HE - 0x05D5: 0xE5, # HEBREW LETTER VAV - 0x05D6: 0xE6, # HEBREW LETTER ZAYIN - 0x05D7: 0xE7, # HEBREW LETTER HET - 0x05D8: 0xE8, # HEBREW LETTER TET - 0x05D9: 0xE9, # HEBREW LETTER YOD - 0x05DA: 0xEA, # HEBREW LETTER FINAL KAF - 0x05DB: 0xEB, # HEBREW LETTER KAF - 0x05DC: 0xEC, # HEBREW LETTER LAMED - 0x05DD: 0xED, # HEBREW LETTER FINAL MEM - 0x05DE: 0xEE, # HEBREW LETTER MEM - 0x05DF: 0xEF, # HEBREW LETTER FINAL NUN - 0x05E0: 0xF0, # HEBREW LETTER NUN - 0x05E1: 0xF1, # HEBREW LETTER SAMEKH - 0x05E2: 0xF2, # HEBREW LETTER AYIN - 0x05E3: 0xF3, # HEBREW LETTER FINAL PE - 0x05E4: 0xF4, # HEBREW LETTER PE - 0x05E5: 0xF5, # HEBREW LETTER FINAL TSADI - 0x05E6: 0xF6, # HEBREW LETTER TSADI - 0x05E7: 0xF7, # HEBREW LETTER QOF - 0x05E8: 0xF8, # HEBREW LETTER RESH - 0x05E9: 0xF9, # HEBREW LETTER SHIN - 0x05EA: 0xFA, # HEBREW LETTER TAV - 0x200E: 0xFD, # LEFT-TO-RIGHT MARK - 0x200F: 0xFE, # RIGHT-TO-LEFT MARK - 0x2017: 0xDF, # DOUBLE LOW LINE + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00D7: 0xAA, # MULTIPLICATION SIGN + 0x00F7: 0xBA, # DIVISION SIGN + 0x05D0: 0xE0, # HEBREW LETTER ALEF + 0x05D1: 0xE1, # HEBREW LETTER BET + 0x05D2: 0xE2, # HEBREW LETTER GIMEL + 0x05D3: 0xE3, # HEBREW LETTER DALET + 0x05D4: 0xE4, # HEBREW LETTER HE + 0x05D5: 0xE5, # HEBREW LETTER VAV + 0x05D6: 0xE6, # HEBREW LETTER ZAYIN + 0x05D7: 0xE7, # HEBREW LETTER HET + 0x05D8: 0xE8, # HEBREW LETTER TET + 0x05D9: 0xE9, # HEBREW LETTER YOD + 0x05DA: 0xEA, # HEBREW LETTER FINAL KAF + 0x05DB: 0xEB, # HEBREW LETTER KAF + 0x05DC: 0xEC, # HEBREW LETTER LAMED + 0x05DD: 0xED, # HEBREW LETTER FINAL MEM + 0x05DE: 0xEE, # HEBREW LETTER MEM + 0x05DF: 0xEF, # HEBREW LETTER FINAL NUN + 0x05E0: 0xF0, # HEBREW LETTER NUN + 0x05E1: 0xF1, # HEBREW LETTER SAMEKH + 0x05E2: 0xF2, # HEBREW LETTER AYIN + 0x05E3: 0xF3, # HEBREW LETTER FINAL PE + 0x05E4: 0xF4, # HEBREW LETTER PE + 0x05E5: 0xF5, # HEBREW LETTER FINAL TSADI + 0x05E6: 0xF6, # HEBREW LETTER TSADI + 0x05E7: 0xF7, # HEBREW LETTER QOF + 0x05E8: 0xF8, # HEBREW LETTER RESH + 0x05E9: 0xF9, # HEBREW LETTER SHIN + 0x05EA: 0xFA, # HEBREW LETTER TAV + 0x200E: 0xFD, # LEFT-TO-RIGHT MARK + 0x200F: 0xFE, # RIGHT-TO-LEFT MARK + 0x2017: 0xDF, # DOUBLE LOW LINE } - Modified: python/branches/ssize_t/Lib/encodings/iso8859_9.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/iso8859_9.py (original) +++ python/branches/ssize_t/Lib/encodings/iso8859_9.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u011e' # 0xD0 -> LATIN CAPITAL LETTER G WITH BREVE - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0130' # 0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u011f' # 0xF0 -> LATIN SMALL LETTER G WITH BREVE - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u0131' # 0xFD -> LATIN SMALL LETTER DOTLESS I - u'\u015f' # 0xFE -> LATIN SMALL LETTER S WITH CEDILLA - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u011e' # 0xD0 -> LATIN CAPITAL LETTER G WITH BREVE + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0130' # 0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u011f' # 0xF0 -> LATIN SMALL LETTER G WITH BREVE + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0131' # 0xFD -> LATIN SMALL LETTER DOTLESS I + u'\u015f' # 0xFE -> LATIN SMALL LETTER S WITH CEDILLA + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # - 0x0081: 0x81, # - 0x0082: 0x82, # - 0x0083: 0x83, # - 0x0084: 0x84, # - 0x0085: 0x85, # - 0x0086: 0x86, # - 0x0087: 0x87, # - 0x0088: 0x88, # - 0x0089: 0x89, # - 0x008A: 0x8A, # - 0x008B: 0x8B, # - 0x008C: 0x8C, # - 0x008D: 0x8D, # - 0x008E: 0x8E, # - 0x008F: 0x8F, # - 0x0090: 0x90, # - 0x0091: 0x91, # - 0x0092: 0x92, # - 0x0093: 0x93, # - 0x0094: 0x94, # - 0x0095: 0x95, # - 0x0096: 0x96, # - 0x0097: 0x97, # - 0x0098: 0x98, # - 0x0099: 0x99, # - 0x009A: 0x9A, # - 0x009B: 0x9B, # - 0x009C: 0x9C, # - 0x009D: 0x9D, # - 0x009E: 0x9E, # - 0x009F: 0x9F, # - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xBF, # INVERTED QUESTION MARK - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x011E: 0xD0, # LATIN CAPITAL LETTER G WITH BREVE - 0x011F: 0xF0, # LATIN SMALL LETTER G WITH BREVE - 0x0130: 0xDD, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0131: 0xFD, # LATIN SMALL LETTER DOTLESS I - 0x015E: 0xDE, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015F: 0xFE, # LATIN SMALL LETTER S WITH CEDILLA + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xBF, # INVERTED QUESTION MARK + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011E: 0xD0, # LATIN CAPITAL LETTER G WITH BREVE + 0x011F: 0xF0, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0xDD, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0xFD, # LATIN SMALL LETTER DOTLESS I + 0x015E: 0xDE, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015F: 0xFE, # LATIN SMALL LETTER S WITH CEDILLA } - Modified: python/branches/ssize_t/Lib/encodings/koi8_r.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/koi8_r.py (original) +++ python/branches/ssize_t/Lib/encodings/koi8_r.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u2500' # 0x80 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u2502' # 0x81 -> BOX DRAWINGS LIGHT VERTICAL - u'\u250c' # 0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2510' # 0x83 -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x84 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2518' # 0x85 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u251c' # 0x86 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2524' # 0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u252c' # 0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u2534' # 0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u253c' # 0x8A -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u2580' # 0x8B -> UPPER HALF BLOCK - u'\u2584' # 0x8C -> LOWER HALF BLOCK - u'\u2588' # 0x8D -> FULL BLOCK - u'\u258c' # 0x8E -> LEFT HALF BLOCK - u'\u2590' # 0x8F -> RIGHT HALF BLOCK - u'\u2591' # 0x90 -> LIGHT SHADE - u'\u2592' # 0x91 -> MEDIUM SHADE - u'\u2593' # 0x92 -> DARK SHADE - u'\u2320' # 0x93 -> TOP HALF INTEGRAL - u'\u25a0' # 0x94 -> BLACK SQUARE - u'\u2219' # 0x95 -> BULLET OPERATOR - u'\u221a' # 0x96 -> SQUARE ROOT - u'\u2248' # 0x97 -> ALMOST EQUAL TO - u'\u2264' # 0x98 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0x99 -> GREATER-THAN OR EQUAL TO - u'\xa0' # 0x9A -> NO-BREAK SPACE - u'\u2321' # 0x9B -> BOTTOM HALF INTEGRAL - u'\xb0' # 0x9C -> DEGREE SIGN - u'\xb2' # 0x9D -> SUPERSCRIPT TWO - u'\xb7' # 0x9E -> MIDDLE DOT - u'\xf7' # 0x9F -> DIVISION SIGN - u'\u2550' # 0xA0 -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u2551' # 0xA1 -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2552' # 0xA2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u0451' # 0xA3 -> CYRILLIC SMALL LETTER IO - u'\u2553' # 0xA4 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u2554' # 0xA5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2555' # 0xA6 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2556' # 0xA7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2557' # 0xA8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u2558' # 0xA9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2559' # 0xAA -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u255a' # 0xAB -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u255b' # 0xAC -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u255c' # 0xAD -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255d' # 0xAE -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255e' # 0xAF -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0xB0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u2560' # 0xB1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2561' # 0xB2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u0401' # 0xB3 -> CYRILLIC CAPITAL LETTER IO - u'\u2562' # 0xB4 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2563' # 0xB5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2564' # 0xB6 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0xB7 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2566' # 0xB8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2567' # 0xB9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0xBA -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2569' # 0xBB -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u256a' # 0xBC -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u256b' # 0xBD -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256c' # 0xBE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa9' # 0xBF -> COPYRIGHT SIGN - u'\u044e' # 0xC0 -> CYRILLIC SMALL LETTER YU - u'\u0430' # 0xC1 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xC2 -> CYRILLIC SMALL LETTER BE - u'\u0446' # 0xC3 -> CYRILLIC SMALL LETTER TSE - u'\u0434' # 0xC4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xC5 -> CYRILLIC SMALL LETTER IE - u'\u0444' # 0xC6 -> CYRILLIC SMALL LETTER EF - u'\u0433' # 0xC7 -> CYRILLIC SMALL LETTER GHE - u'\u0445' # 0xC8 -> CYRILLIC SMALL LETTER HA - u'\u0438' # 0xC9 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xCA -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xCB -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xCC -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xCD -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xCE -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xCF -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xD0 -> CYRILLIC SMALL LETTER PE - u'\u044f' # 0xD1 -> CYRILLIC SMALL LETTER YA - u'\u0440' # 0xD2 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xD3 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xD4 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xD5 -> CYRILLIC SMALL LETTER U - u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE - u'\u0432' # 0xD7 -> CYRILLIC SMALL LETTER VE - u'\u044c' # 0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044b' # 0xD9 -> CYRILLIC SMALL LETTER YERU - u'\u0437' # 0xDA -> CYRILLIC SMALL LETTER ZE - u'\u0448' # 0xDB -> CYRILLIC SMALL LETTER SHA - u'\u044d' # 0xDC -> CYRILLIC SMALL LETTER E - u'\u0449' # 0xDD -> CYRILLIC SMALL LETTER SHCHA - u'\u0447' # 0xDE -> CYRILLIC SMALL LETTER CHE - u'\u044a' # 0xDF -> CYRILLIC SMALL LETTER HARD SIGN - u'\u042e' # 0xE0 -> CYRILLIC CAPITAL LETTER YU - u'\u0410' # 0xE1 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0xE2 -> CYRILLIC CAPITAL LETTER BE - u'\u0426' # 0xE3 -> CYRILLIC CAPITAL LETTER TSE - u'\u0414' # 0xE4 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0xE5 -> CYRILLIC CAPITAL LETTER IE - u'\u0424' # 0xE6 -> CYRILLIC CAPITAL LETTER EF - u'\u0413' # 0xE7 -> CYRILLIC CAPITAL LETTER GHE - u'\u0425' # 0xE8 -> CYRILLIC CAPITAL LETTER HA - u'\u0418' # 0xE9 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0xEA -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0xEB -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0xEC -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0xED -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0xEE -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0xEF -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0xF0 -> CYRILLIC CAPITAL LETTER PE - u'\u042f' # 0xF1 -> CYRILLIC CAPITAL LETTER YA - u'\u0420' # 0xF2 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0xF3 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0xF4 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0xF5 -> CYRILLIC CAPITAL LETTER U - u'\u0416' # 0xF6 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0412' # 0xF7 -> CYRILLIC CAPITAL LETTER VE - u'\u042c' # 0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042b' # 0xF9 -> CYRILLIC CAPITAL LETTER YERU - u'\u0417' # 0xFA -> CYRILLIC CAPITAL LETTER ZE - u'\u0428' # 0xFB -> CYRILLIC CAPITAL LETTER SHA - u'\u042d' # 0xFC -> CYRILLIC CAPITAL LETTER E - u'\u0429' # 0xFD -> CYRILLIC CAPITAL LETTER SHCHA - u'\u0427' # 0xFE -> CYRILLIC CAPITAL LETTER CHE - u'\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u2500' # 0x80 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u2502' # 0x81 -> BOX DRAWINGS LIGHT VERTICAL + u'\u250c' # 0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2510' # 0x83 -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x84 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2518' # 0x85 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u251c' # 0x86 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2524' # 0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u252c' # 0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u2534' # 0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u253c' # 0x8A -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u2580' # 0x8B -> UPPER HALF BLOCK + u'\u2584' # 0x8C -> LOWER HALF BLOCK + u'\u2588' # 0x8D -> FULL BLOCK + u'\u258c' # 0x8E -> LEFT HALF BLOCK + u'\u2590' # 0x8F -> RIGHT HALF BLOCK + u'\u2591' # 0x90 -> LIGHT SHADE + u'\u2592' # 0x91 -> MEDIUM SHADE + u'\u2593' # 0x92 -> DARK SHADE + u'\u2320' # 0x93 -> TOP HALF INTEGRAL + u'\u25a0' # 0x94 -> BLACK SQUARE + u'\u2219' # 0x95 -> BULLET OPERATOR + u'\u221a' # 0x96 -> SQUARE ROOT + u'\u2248' # 0x97 -> ALMOST EQUAL TO + u'\u2264' # 0x98 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0x99 -> GREATER-THAN OR EQUAL TO + u'\xa0' # 0x9A -> NO-BREAK SPACE + u'\u2321' # 0x9B -> BOTTOM HALF INTEGRAL + u'\xb0' # 0x9C -> DEGREE SIGN + u'\xb2' # 0x9D -> SUPERSCRIPT TWO + u'\xb7' # 0x9E -> MIDDLE DOT + u'\xf7' # 0x9F -> DIVISION SIGN + u'\u2550' # 0xA0 -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u2551' # 0xA1 -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2552' # 0xA2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u0451' # 0xA3 -> CYRILLIC SMALL LETTER IO + u'\u2553' # 0xA4 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u2554' # 0xA5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2555' # 0xA6 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2556' # 0xA7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2557' # 0xA8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u2558' # 0xA9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2559' # 0xAA -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u255a' # 0xAB -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u255b' # 0xAC -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u255c' # 0xAD -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255d' # 0xAE -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255e' # 0xAF -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0xB0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u2560' # 0xB1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2561' # 0xB2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u0401' # 0xB3 -> CYRILLIC CAPITAL LETTER IO + u'\u2562' # 0xB4 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2563' # 0xB5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2564' # 0xB6 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0xB7 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2566' # 0xB8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2567' # 0xB9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0xBA -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2569' # 0xBB -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u256a' # 0xBC -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u256b' # 0xBD -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256c' # 0xBE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa9' # 0xBF -> COPYRIGHT SIGN + u'\u044e' # 0xC0 -> CYRILLIC SMALL LETTER YU + u'\u0430' # 0xC1 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0xC2 -> CYRILLIC SMALL LETTER BE + u'\u0446' # 0xC3 -> CYRILLIC SMALL LETTER TSE + u'\u0434' # 0xC4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0xC5 -> CYRILLIC SMALL LETTER IE + u'\u0444' # 0xC6 -> CYRILLIC SMALL LETTER EF + u'\u0433' # 0xC7 -> CYRILLIC SMALL LETTER GHE + u'\u0445' # 0xC8 -> CYRILLIC SMALL LETTER HA + u'\u0438' # 0xC9 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0xCA -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0xCB -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0xCC -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0xCD -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0xCE -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0xCF -> CYRILLIC SMALL LETTER O + u'\u043f' # 0xD0 -> CYRILLIC SMALL LETTER PE + u'\u044f' # 0xD1 -> CYRILLIC SMALL LETTER YA + u'\u0440' # 0xD2 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0xD3 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0xD4 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0xD5 -> CYRILLIC SMALL LETTER U + u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE + u'\u0432' # 0xD7 -> CYRILLIC SMALL LETTER VE + u'\u044c' # 0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044b' # 0xD9 -> CYRILLIC SMALL LETTER YERU + u'\u0437' # 0xDA -> CYRILLIC SMALL LETTER ZE + u'\u0448' # 0xDB -> CYRILLIC SMALL LETTER SHA + u'\u044d' # 0xDC -> CYRILLIC SMALL LETTER E + u'\u0449' # 0xDD -> CYRILLIC SMALL LETTER SHCHA + u'\u0447' # 0xDE -> CYRILLIC SMALL LETTER CHE + u'\u044a' # 0xDF -> CYRILLIC SMALL LETTER HARD SIGN + u'\u042e' # 0xE0 -> CYRILLIC CAPITAL LETTER YU + u'\u0410' # 0xE1 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0xE2 -> CYRILLIC CAPITAL LETTER BE + u'\u0426' # 0xE3 -> CYRILLIC CAPITAL LETTER TSE + u'\u0414' # 0xE4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0xE5 -> CYRILLIC CAPITAL LETTER IE + u'\u0424' # 0xE6 -> CYRILLIC CAPITAL LETTER EF + u'\u0413' # 0xE7 -> CYRILLIC CAPITAL LETTER GHE + u'\u0425' # 0xE8 -> CYRILLIC CAPITAL LETTER HA + u'\u0418' # 0xE9 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0xEA -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0xEB -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0xEC -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0xED -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0xEE -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0xEF -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0xF0 -> CYRILLIC CAPITAL LETTER PE + u'\u042f' # 0xF1 -> CYRILLIC CAPITAL LETTER YA + u'\u0420' # 0xF2 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0xF3 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0xF4 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0xF5 -> CYRILLIC CAPITAL LETTER U + u'\u0416' # 0xF6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0412' # 0xF7 -> CYRILLIC CAPITAL LETTER VE + u'\u042c' # 0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042b' # 0xF9 -> CYRILLIC CAPITAL LETTER YERU + u'\u0417' # 0xFA -> CYRILLIC CAPITAL LETTER ZE + u'\u0428' # 0xFB -> CYRILLIC CAPITAL LETTER SHA + u'\u042d' # 0xFC -> CYRILLIC CAPITAL LETTER E + u'\u0429' # 0xFD -> CYRILLIC CAPITAL LETTER SHCHA + u'\u0427' # 0xFE -> CYRILLIC CAPITAL LETTER CHE + u'\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0x9A, # NO-BREAK SPACE - 0x00A9: 0xBF, # COPYRIGHT SIGN - 0x00B0: 0x9C, # DEGREE SIGN - 0x00B2: 0x9D, # SUPERSCRIPT TWO - 0x00B7: 0x9E, # MIDDLE DOT - 0x00F7: 0x9F, # DIVISION SIGN - 0x0401: 0xB3, # CYRILLIC CAPITAL LETTER IO - 0x0410: 0xE1, # CYRILLIC CAPITAL LETTER A - 0x0411: 0xE2, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0xF7, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0xE7, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0xE4, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0xE5, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0xF6, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0xFA, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0xE9, # CYRILLIC CAPITAL LETTER I - 0x0419: 0xEA, # CYRILLIC CAPITAL LETTER SHORT I - 0x041A: 0xEB, # CYRILLIC CAPITAL LETTER KA - 0x041B: 0xEC, # CYRILLIC CAPITAL LETTER EL - 0x041C: 0xED, # CYRILLIC CAPITAL LETTER EM - 0x041D: 0xEE, # CYRILLIC CAPITAL LETTER EN - 0x041E: 0xEF, # CYRILLIC CAPITAL LETTER O - 0x041F: 0xF0, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0xF2, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0xF3, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0xF4, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0xF5, # CYRILLIC CAPITAL LETTER U - 0x0424: 0xE6, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0xE8, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0xE3, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0xFE, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0xFB, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0xFD, # CYRILLIC CAPITAL LETTER SHCHA - 0x042A: 0xFF, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042B: 0xF9, # CYRILLIC CAPITAL LETTER YERU - 0x042C: 0xF8, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042D: 0xFC, # CYRILLIC CAPITAL LETTER E - 0x042E: 0xE0, # CYRILLIC CAPITAL LETTER YU - 0x042F: 0xF1, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0xC1, # CYRILLIC SMALL LETTER A - 0x0431: 0xC2, # CYRILLIC SMALL LETTER BE - 0x0432: 0xD7, # CYRILLIC SMALL LETTER VE - 0x0433: 0xC7, # CYRILLIC SMALL LETTER GHE - 0x0434: 0xC4, # CYRILLIC SMALL LETTER DE - 0x0435: 0xC5, # CYRILLIC SMALL LETTER IE - 0x0436: 0xD6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0xDA, # CYRILLIC SMALL LETTER ZE - 0x0438: 0xC9, # CYRILLIC SMALL LETTER I - 0x0439: 0xCA, # CYRILLIC SMALL LETTER SHORT I - 0x043A: 0xCB, # CYRILLIC SMALL LETTER KA - 0x043B: 0xCC, # CYRILLIC SMALL LETTER EL - 0x043C: 0xCD, # CYRILLIC SMALL LETTER EM - 0x043D: 0xCE, # CYRILLIC SMALL LETTER EN - 0x043E: 0xCF, # CYRILLIC SMALL LETTER O - 0x043F: 0xD0, # CYRILLIC SMALL LETTER PE - 0x0440: 0xD2, # CYRILLIC SMALL LETTER ER - 0x0441: 0xD3, # CYRILLIC SMALL LETTER ES - 0x0442: 0xD4, # CYRILLIC SMALL LETTER TE - 0x0443: 0xD5, # CYRILLIC SMALL LETTER U - 0x0444: 0xC6, # CYRILLIC SMALL LETTER EF - 0x0445: 0xC8, # CYRILLIC SMALL LETTER HA - 0x0446: 0xC3, # CYRILLIC SMALL LETTER TSE - 0x0447: 0xDE, # CYRILLIC SMALL LETTER CHE - 0x0448: 0xDB, # CYRILLIC SMALL LETTER SHA - 0x0449: 0xDD, # CYRILLIC SMALL LETTER SHCHA - 0x044A: 0xDF, # CYRILLIC SMALL LETTER HARD SIGN - 0x044B: 0xD9, # CYRILLIC SMALL LETTER YERU - 0x044C: 0xD8, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044D: 0xDC, # CYRILLIC SMALL LETTER E - 0x044E: 0xC0, # CYRILLIC SMALL LETTER YU - 0x044F: 0xD1, # CYRILLIC SMALL LETTER YA - 0x0451: 0xA3, # CYRILLIC SMALL LETTER IO - 0x2219: 0x95, # BULLET OPERATOR - 0x221A: 0x96, # SQUARE ROOT - 0x2248: 0x97, # ALMOST EQUAL TO - 0x2264: 0x98, # LESS-THAN OR EQUAL TO - 0x2265: 0x99, # GREATER-THAN OR EQUAL TO - 0x2320: 0x93, # TOP HALF INTEGRAL - 0x2321: 0x9B, # BOTTOM HALF INTEGRAL - 0x2500: 0x80, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x81, # BOX DRAWINGS LIGHT VERTICAL - 0x250C: 0x82, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x83, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x84, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x85, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251C: 0x86, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x87, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252C: 0x88, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x89, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253C: 0x8A, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0xA0, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0xA1, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0xA2, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0xA4, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0xA5, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0xA6, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0xA7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0xA8, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0xA9, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0xAA, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255A: 0xAB, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255B: 0xAC, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255C: 0xAD, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255D: 0xAE, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255E: 0xAF, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255F: 0xB0, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0xB1, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0xB2, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0xB4, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0xB5, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0xB6, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0xB7, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0xB8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0xB9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0xBA, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0xBB, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256A: 0xBC, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256B: 0xBD, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256C: 0xBE, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x8B, # UPPER HALF BLOCK - 0x2584: 0x8C, # LOWER HALF BLOCK - 0x2588: 0x8D, # FULL BLOCK - 0x258C: 0x8E, # LEFT HALF BLOCK - 0x2590: 0x8F, # RIGHT HALF BLOCK - 0x2591: 0x90, # LIGHT SHADE - 0x2592: 0x91, # MEDIUM SHADE - 0x2593: 0x92, # DARK SHADE - 0x25A0: 0x94, # BLACK SQUARE + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0x9A, # NO-BREAK SPACE + 0x00A9: 0xBF, # COPYRIGHT SIGN + 0x00B0: 0x9C, # DEGREE SIGN + 0x00B2: 0x9D, # SUPERSCRIPT TWO + 0x00B7: 0x9E, # MIDDLE DOT + 0x00F7: 0x9F, # DIVISION SIGN + 0x0401: 0xB3, # CYRILLIC CAPITAL LETTER IO + 0x0410: 0xE1, # CYRILLIC CAPITAL LETTER A + 0x0411: 0xE2, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0xF7, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0xE7, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0xE4, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0xE5, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0xF6, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0xFA, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0xE9, # CYRILLIC CAPITAL LETTER I + 0x0419: 0xEA, # CYRILLIC CAPITAL LETTER SHORT I + 0x041A: 0xEB, # CYRILLIC CAPITAL LETTER KA + 0x041B: 0xEC, # CYRILLIC CAPITAL LETTER EL + 0x041C: 0xED, # CYRILLIC CAPITAL LETTER EM + 0x041D: 0xEE, # CYRILLIC CAPITAL LETTER EN + 0x041E: 0xEF, # CYRILLIC CAPITAL LETTER O + 0x041F: 0xF0, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0xF2, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0xF3, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0xF4, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0xF5, # CYRILLIC CAPITAL LETTER U + 0x0424: 0xE6, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0xE8, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0xE3, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0xFE, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0xFB, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0xFD, # CYRILLIC CAPITAL LETTER SHCHA + 0x042A: 0xFF, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042B: 0xF9, # CYRILLIC CAPITAL LETTER YERU + 0x042C: 0xF8, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042D: 0xFC, # CYRILLIC CAPITAL LETTER E + 0x042E: 0xE0, # CYRILLIC CAPITAL LETTER YU + 0x042F: 0xF1, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0xC1, # CYRILLIC SMALL LETTER A + 0x0431: 0xC2, # CYRILLIC SMALL LETTER BE + 0x0432: 0xD7, # CYRILLIC SMALL LETTER VE + 0x0433: 0xC7, # CYRILLIC SMALL LETTER GHE + 0x0434: 0xC4, # CYRILLIC SMALL LETTER DE + 0x0435: 0xC5, # CYRILLIC SMALL LETTER IE + 0x0436: 0xD6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0xDA, # CYRILLIC SMALL LETTER ZE + 0x0438: 0xC9, # CYRILLIC SMALL LETTER I + 0x0439: 0xCA, # CYRILLIC SMALL LETTER SHORT I + 0x043A: 0xCB, # CYRILLIC SMALL LETTER KA + 0x043B: 0xCC, # CYRILLIC SMALL LETTER EL + 0x043C: 0xCD, # CYRILLIC SMALL LETTER EM + 0x043D: 0xCE, # CYRILLIC SMALL LETTER EN + 0x043E: 0xCF, # CYRILLIC SMALL LETTER O + 0x043F: 0xD0, # CYRILLIC SMALL LETTER PE + 0x0440: 0xD2, # CYRILLIC SMALL LETTER ER + 0x0441: 0xD3, # CYRILLIC SMALL LETTER ES + 0x0442: 0xD4, # CYRILLIC SMALL LETTER TE + 0x0443: 0xD5, # CYRILLIC SMALL LETTER U + 0x0444: 0xC6, # CYRILLIC SMALL LETTER EF + 0x0445: 0xC8, # CYRILLIC SMALL LETTER HA + 0x0446: 0xC3, # CYRILLIC SMALL LETTER TSE + 0x0447: 0xDE, # CYRILLIC SMALL LETTER CHE + 0x0448: 0xDB, # CYRILLIC SMALL LETTER SHA + 0x0449: 0xDD, # CYRILLIC SMALL LETTER SHCHA + 0x044A: 0xDF, # CYRILLIC SMALL LETTER HARD SIGN + 0x044B: 0xD9, # CYRILLIC SMALL LETTER YERU + 0x044C: 0xD8, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044D: 0xDC, # CYRILLIC SMALL LETTER E + 0x044E: 0xC0, # CYRILLIC SMALL LETTER YU + 0x044F: 0xD1, # CYRILLIC SMALL LETTER YA + 0x0451: 0xA3, # CYRILLIC SMALL LETTER IO + 0x2219: 0x95, # BULLET OPERATOR + 0x221A: 0x96, # SQUARE ROOT + 0x2248: 0x97, # ALMOST EQUAL TO + 0x2264: 0x98, # LESS-THAN OR EQUAL TO + 0x2265: 0x99, # GREATER-THAN OR EQUAL TO + 0x2320: 0x93, # TOP HALF INTEGRAL + 0x2321: 0x9B, # BOTTOM HALF INTEGRAL + 0x2500: 0x80, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x81, # BOX DRAWINGS LIGHT VERTICAL + 0x250C: 0x82, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x83, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x84, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x85, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251C: 0x86, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x87, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252C: 0x88, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x89, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253C: 0x8A, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0xA0, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0xA1, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0xA2, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0xA4, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0xA5, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0xA6, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0xA7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0xA8, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0xA9, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0xAA, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255A: 0xAB, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255B: 0xAC, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255C: 0xAD, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255D: 0xAE, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255E: 0xAF, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255F: 0xB0, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0xB1, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0xB2, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0xB4, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0xB5, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0xB6, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0xB7, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0xB8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0xB9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0xBA, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0xBB, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256A: 0xBC, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256B: 0xBD, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256C: 0xBE, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x8B, # UPPER HALF BLOCK + 0x2584: 0x8C, # LOWER HALF BLOCK + 0x2588: 0x8D, # FULL BLOCK + 0x258C: 0x8E, # LEFT HALF BLOCK + 0x2590: 0x8F, # RIGHT HALF BLOCK + 0x2591: 0x90, # LIGHT SHADE + 0x2592: 0x91, # MEDIUM SHADE + 0x2593: 0x92, # DARK SHADE + 0x25A0: 0x94, # BLACK SQUARE } - Modified: python/branches/ssize_t/Lib/encodings/koi8_u.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/koi8_u.py (original) +++ python/branches/ssize_t/Lib/encodings/koi8_u.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u2500' # 0x80 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u2502' # 0x81 -> BOX DRAWINGS LIGHT VERTICAL - u'\u250c' # 0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2510' # 0x83 -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x84 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2518' # 0x85 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u251c' # 0x86 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2524' # 0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u252c' # 0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u2534' # 0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u253c' # 0x8A -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u2580' # 0x8B -> UPPER HALF BLOCK - u'\u2584' # 0x8C -> LOWER HALF BLOCK - u'\u2588' # 0x8D -> FULL BLOCK - u'\u258c' # 0x8E -> LEFT HALF BLOCK - u'\u2590' # 0x8F -> RIGHT HALF BLOCK - u'\u2591' # 0x90 -> LIGHT SHADE - u'\u2592' # 0x91 -> MEDIUM SHADE - u'\u2593' # 0x92 -> DARK SHADE - u'\u2320' # 0x93 -> TOP HALF INTEGRAL - u'\u25a0' # 0x94 -> BLACK SQUARE - u'\u2219' # 0x95 -> BULLET OPERATOR - u'\u221a' # 0x96 -> SQUARE ROOT - u'\u2248' # 0x97 -> ALMOST EQUAL TO - u'\u2264' # 0x98 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0x99 -> GREATER-THAN OR EQUAL TO - u'\xa0' # 0x9A -> NO-BREAK SPACE - u'\u2321' # 0x9B -> BOTTOM HALF INTEGRAL - u'\xb0' # 0x9C -> DEGREE SIGN - u'\xb2' # 0x9D -> SUPERSCRIPT TWO - u'\xb7' # 0x9E -> MIDDLE DOT - u'\xf7' # 0x9F -> DIVISION SIGN - u'\u2550' # 0xA0 -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u2551' # 0xA1 -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2552' # 0xA2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u0451' # 0xA3 -> CYRILLIC SMALL LETTER IO - u'\u0454' # 0xA4 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u2554' # 0xA5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u0456' # 0xA6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0457' # 0xA7 -> CYRILLIC SMALL LETTER YI (UKRAINIAN) - u'\u2557' # 0xA8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u2558' # 0xA9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2559' # 0xAA -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u255a' # 0xAB -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u255b' # 0xAC -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u0491' # 0xAD -> CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN - u'\u255d' # 0xAE -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255e' # 0xAF -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0xB0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u2560' # 0xB1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2561' # 0xB2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u0401' # 0xB3 -> CYRILLIC CAPITAL LETTER IO - u'\u0404' # 0xB4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u2563' # 0xB5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u0406' # 0xB6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0407' # 0xB7 -> CYRILLIC CAPITAL LETTER YI (UKRAINIAN) - u'\u2566' # 0xB8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2567' # 0xB9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0xBA -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2569' # 0xBB -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u256a' # 0xBC -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u0490' # 0xBD -> CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN - u'\u256c' # 0xBE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa9' # 0xBF -> COPYRIGHT SIGN - u'\u044e' # 0xC0 -> CYRILLIC SMALL LETTER YU - u'\u0430' # 0xC1 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xC2 -> CYRILLIC SMALL LETTER BE - u'\u0446' # 0xC3 -> CYRILLIC SMALL LETTER TSE - u'\u0434' # 0xC4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xC5 -> CYRILLIC SMALL LETTER IE - u'\u0444' # 0xC6 -> CYRILLIC SMALL LETTER EF - u'\u0433' # 0xC7 -> CYRILLIC SMALL LETTER GHE - u'\u0445' # 0xC8 -> CYRILLIC SMALL LETTER HA - u'\u0438' # 0xC9 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xCA -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xCB -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xCC -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xCD -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xCE -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xCF -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xD0 -> CYRILLIC SMALL LETTER PE - u'\u044f' # 0xD1 -> CYRILLIC SMALL LETTER YA - u'\u0440' # 0xD2 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xD3 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xD4 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xD5 -> CYRILLIC SMALL LETTER U - u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE - u'\u0432' # 0xD7 -> CYRILLIC SMALL LETTER VE - u'\u044c' # 0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044b' # 0xD9 -> CYRILLIC SMALL LETTER YERU - u'\u0437' # 0xDA -> CYRILLIC SMALL LETTER ZE - u'\u0448' # 0xDB -> CYRILLIC SMALL LETTER SHA - u'\u044d' # 0xDC -> CYRILLIC SMALL LETTER E - u'\u0449' # 0xDD -> CYRILLIC SMALL LETTER SHCHA - u'\u0447' # 0xDE -> CYRILLIC SMALL LETTER CHE - u'\u044a' # 0xDF -> CYRILLIC SMALL LETTER HARD SIGN - u'\u042e' # 0xE0 -> CYRILLIC CAPITAL LETTER YU - u'\u0410' # 0xE1 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0xE2 -> CYRILLIC CAPITAL LETTER BE - u'\u0426' # 0xE3 -> CYRILLIC CAPITAL LETTER TSE - u'\u0414' # 0xE4 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0xE5 -> CYRILLIC CAPITAL LETTER IE - u'\u0424' # 0xE6 -> CYRILLIC CAPITAL LETTER EF - u'\u0413' # 0xE7 -> CYRILLIC CAPITAL LETTER GHE - u'\u0425' # 0xE8 -> CYRILLIC CAPITAL LETTER HA - u'\u0418' # 0xE9 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0xEA -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0xEB -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0xEC -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0xED -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0xEE -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0xEF -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0xF0 -> CYRILLIC CAPITAL LETTER PE - u'\u042f' # 0xF1 -> CYRILLIC CAPITAL LETTER YA - u'\u0420' # 0xF2 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0xF3 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0xF4 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0xF5 -> CYRILLIC CAPITAL LETTER U - u'\u0416' # 0xF6 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0412' # 0xF7 -> CYRILLIC CAPITAL LETTER VE - u'\u042c' # 0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042b' # 0xF9 -> CYRILLIC CAPITAL LETTER YERU - u'\u0417' # 0xFA -> CYRILLIC CAPITAL LETTER ZE - u'\u0428' # 0xFB -> CYRILLIC CAPITAL LETTER SHA - u'\u042d' # 0xFC -> CYRILLIC CAPITAL LETTER E - u'\u0429' # 0xFD -> CYRILLIC CAPITAL LETTER SHCHA - u'\u0427' # 0xFE -> CYRILLIC CAPITAL LETTER CHE - u'\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u2500' # 0x80 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u2502' # 0x81 -> BOX DRAWINGS LIGHT VERTICAL + u'\u250c' # 0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2510' # 0x83 -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x84 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2518' # 0x85 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u251c' # 0x86 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2524' # 0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u252c' # 0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u2534' # 0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u253c' # 0x8A -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u2580' # 0x8B -> UPPER HALF BLOCK + u'\u2584' # 0x8C -> LOWER HALF BLOCK + u'\u2588' # 0x8D -> FULL BLOCK + u'\u258c' # 0x8E -> LEFT HALF BLOCK + u'\u2590' # 0x8F -> RIGHT HALF BLOCK + u'\u2591' # 0x90 -> LIGHT SHADE + u'\u2592' # 0x91 -> MEDIUM SHADE + u'\u2593' # 0x92 -> DARK SHADE + u'\u2320' # 0x93 -> TOP HALF INTEGRAL + u'\u25a0' # 0x94 -> BLACK SQUARE + u'\u2219' # 0x95 -> BULLET OPERATOR + u'\u221a' # 0x96 -> SQUARE ROOT + u'\u2248' # 0x97 -> ALMOST EQUAL TO + u'\u2264' # 0x98 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0x99 -> GREATER-THAN OR EQUAL TO + u'\xa0' # 0x9A -> NO-BREAK SPACE + u'\u2321' # 0x9B -> BOTTOM HALF INTEGRAL + u'\xb0' # 0x9C -> DEGREE SIGN + u'\xb2' # 0x9D -> SUPERSCRIPT TWO + u'\xb7' # 0x9E -> MIDDLE DOT + u'\xf7' # 0x9F -> DIVISION SIGN + u'\u2550' # 0xA0 -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u2551' # 0xA1 -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2552' # 0xA2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u0451' # 0xA3 -> CYRILLIC SMALL LETTER IO + u'\u0454' # 0xA4 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u2554' # 0xA5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u0456' # 0xA6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0457' # 0xA7 -> CYRILLIC SMALL LETTER YI (UKRAINIAN) + u'\u2557' # 0xA8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u2558' # 0xA9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2559' # 0xAA -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u255a' # 0xAB -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u255b' # 0xAC -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u0491' # 0xAD -> CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN + u'\u255d' # 0xAE -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255e' # 0xAF -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0xB0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u2560' # 0xB1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2561' # 0xB2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u0401' # 0xB3 -> CYRILLIC CAPITAL LETTER IO + u'\u0404' # 0xB4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u2563' # 0xB5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u0406' # 0xB6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0407' # 0xB7 -> CYRILLIC CAPITAL LETTER YI (UKRAINIAN) + u'\u2566' # 0xB8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2567' # 0xB9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0xBA -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2569' # 0xBB -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u256a' # 0xBC -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u0490' # 0xBD -> CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN + u'\u256c' # 0xBE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa9' # 0xBF -> COPYRIGHT SIGN + u'\u044e' # 0xC0 -> CYRILLIC SMALL LETTER YU + u'\u0430' # 0xC1 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0xC2 -> CYRILLIC SMALL LETTER BE + u'\u0446' # 0xC3 -> CYRILLIC SMALL LETTER TSE + u'\u0434' # 0xC4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0xC5 -> CYRILLIC SMALL LETTER IE + u'\u0444' # 0xC6 -> CYRILLIC SMALL LETTER EF + u'\u0433' # 0xC7 -> CYRILLIC SMALL LETTER GHE + u'\u0445' # 0xC8 -> CYRILLIC SMALL LETTER HA + u'\u0438' # 0xC9 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0xCA -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0xCB -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0xCC -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0xCD -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0xCE -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0xCF -> CYRILLIC SMALL LETTER O + u'\u043f' # 0xD0 -> CYRILLIC SMALL LETTER PE + u'\u044f' # 0xD1 -> CYRILLIC SMALL LETTER YA + u'\u0440' # 0xD2 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0xD3 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0xD4 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0xD5 -> CYRILLIC SMALL LETTER U + u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE + u'\u0432' # 0xD7 -> CYRILLIC SMALL LETTER VE + u'\u044c' # 0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044b' # 0xD9 -> CYRILLIC SMALL LETTER YERU + u'\u0437' # 0xDA -> CYRILLIC SMALL LETTER ZE + u'\u0448' # 0xDB -> CYRILLIC SMALL LETTER SHA + u'\u044d' # 0xDC -> CYRILLIC SMALL LETTER E + u'\u0449' # 0xDD -> CYRILLIC SMALL LETTER SHCHA + u'\u0447' # 0xDE -> CYRILLIC SMALL LETTER CHE + u'\u044a' # 0xDF -> CYRILLIC SMALL LETTER HARD SIGN + u'\u042e' # 0xE0 -> CYRILLIC CAPITAL LETTER YU + u'\u0410' # 0xE1 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0xE2 -> CYRILLIC CAPITAL LETTER BE + u'\u0426' # 0xE3 -> CYRILLIC CAPITAL LETTER TSE + u'\u0414' # 0xE4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0xE5 -> CYRILLIC CAPITAL LETTER IE + u'\u0424' # 0xE6 -> CYRILLIC CAPITAL LETTER EF + u'\u0413' # 0xE7 -> CYRILLIC CAPITAL LETTER GHE + u'\u0425' # 0xE8 -> CYRILLIC CAPITAL LETTER HA + u'\u0418' # 0xE9 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0xEA -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0xEB -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0xEC -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0xED -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0xEE -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0xEF -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0xF0 -> CYRILLIC CAPITAL LETTER PE + u'\u042f' # 0xF1 -> CYRILLIC CAPITAL LETTER YA + u'\u0420' # 0xF2 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0xF3 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0xF4 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0xF5 -> CYRILLIC CAPITAL LETTER U + u'\u0416' # 0xF6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0412' # 0xF7 -> CYRILLIC CAPITAL LETTER VE + u'\u042c' # 0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042b' # 0xF9 -> CYRILLIC CAPITAL LETTER YERU + u'\u0417' # 0xFA -> CYRILLIC CAPITAL LETTER ZE + u'\u0428' # 0xFB -> CYRILLIC CAPITAL LETTER SHA + u'\u042d' # 0xFC -> CYRILLIC CAPITAL LETTER E + u'\u0429' # 0xFD -> CYRILLIC CAPITAL LETTER SHCHA + u'\u0427' # 0xFE -> CYRILLIC CAPITAL LETTER CHE + u'\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0x9A, # NO-BREAK SPACE - 0x00A9: 0xBF, # COPYRIGHT SIGN - 0x00B0: 0x9C, # DEGREE SIGN - 0x00B2: 0x9D, # SUPERSCRIPT TWO - 0x00B7: 0x9E, # MIDDLE DOT - 0x00F7: 0x9F, # DIVISION SIGN - 0x0401: 0xB3, # CYRILLIC CAPITAL LETTER IO - 0x0404: 0xB4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0406: 0xB6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0407: 0xB7, # CYRILLIC CAPITAL LETTER YI (UKRAINIAN) - 0x0410: 0xE1, # CYRILLIC CAPITAL LETTER A - 0x0411: 0xE2, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0xF7, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0xE7, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0xE4, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0xE5, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0xF6, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0xFA, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0xE9, # CYRILLIC CAPITAL LETTER I - 0x0419: 0xEA, # CYRILLIC CAPITAL LETTER SHORT I - 0x041A: 0xEB, # CYRILLIC CAPITAL LETTER KA - 0x041B: 0xEC, # CYRILLIC CAPITAL LETTER EL - 0x041C: 0xED, # CYRILLIC CAPITAL LETTER EM - 0x041D: 0xEE, # CYRILLIC CAPITAL LETTER EN - 0x041E: 0xEF, # CYRILLIC CAPITAL LETTER O - 0x041F: 0xF0, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0xF2, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0xF3, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0xF4, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0xF5, # CYRILLIC CAPITAL LETTER U - 0x0424: 0xE6, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0xE8, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0xE3, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0xFE, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0xFB, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0xFD, # CYRILLIC CAPITAL LETTER SHCHA - 0x042A: 0xFF, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042B: 0xF9, # CYRILLIC CAPITAL LETTER YERU - 0x042C: 0xF8, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042D: 0xFC, # CYRILLIC CAPITAL LETTER E - 0x042E: 0xE0, # CYRILLIC CAPITAL LETTER YU - 0x042F: 0xF1, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0xC1, # CYRILLIC SMALL LETTER A - 0x0431: 0xC2, # CYRILLIC SMALL LETTER BE - 0x0432: 0xD7, # CYRILLIC SMALL LETTER VE - 0x0433: 0xC7, # CYRILLIC SMALL LETTER GHE - 0x0434: 0xC4, # CYRILLIC SMALL LETTER DE - 0x0435: 0xC5, # CYRILLIC SMALL LETTER IE - 0x0436: 0xD6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0xDA, # CYRILLIC SMALL LETTER ZE - 0x0438: 0xC9, # CYRILLIC SMALL LETTER I - 0x0439: 0xCA, # CYRILLIC SMALL LETTER SHORT I - 0x043A: 0xCB, # CYRILLIC SMALL LETTER KA - 0x043B: 0xCC, # CYRILLIC SMALL LETTER EL - 0x043C: 0xCD, # CYRILLIC SMALL LETTER EM - 0x043D: 0xCE, # CYRILLIC SMALL LETTER EN - 0x043E: 0xCF, # CYRILLIC SMALL LETTER O - 0x043F: 0xD0, # CYRILLIC SMALL LETTER PE - 0x0440: 0xD2, # CYRILLIC SMALL LETTER ER - 0x0441: 0xD3, # CYRILLIC SMALL LETTER ES - 0x0442: 0xD4, # CYRILLIC SMALL LETTER TE - 0x0443: 0xD5, # CYRILLIC SMALL LETTER U - 0x0444: 0xC6, # CYRILLIC SMALL LETTER EF - 0x0445: 0xC8, # CYRILLIC SMALL LETTER HA - 0x0446: 0xC3, # CYRILLIC SMALL LETTER TSE - 0x0447: 0xDE, # CYRILLIC SMALL LETTER CHE - 0x0448: 0xDB, # CYRILLIC SMALL LETTER SHA - 0x0449: 0xDD, # CYRILLIC SMALL LETTER SHCHA - 0x044A: 0xDF, # CYRILLIC SMALL LETTER HARD SIGN - 0x044B: 0xD9, # CYRILLIC SMALL LETTER YERU - 0x044C: 0xD8, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044D: 0xDC, # CYRILLIC SMALL LETTER E - 0x044E: 0xC0, # CYRILLIC SMALL LETTER YU - 0x044F: 0xD1, # CYRILLIC SMALL LETTER YA - 0x0451: 0xA3, # CYRILLIC SMALL LETTER IO - 0x0454: 0xA4, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0456: 0xA6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0457: 0xA7, # CYRILLIC SMALL LETTER YI (UKRAINIAN) - 0x0490: 0xBD, # CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN - 0x0491: 0xAD, # CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN - 0x2219: 0x95, # BULLET OPERATOR - 0x221A: 0x96, # SQUARE ROOT - 0x2248: 0x97, # ALMOST EQUAL TO - 0x2264: 0x98, # LESS-THAN OR EQUAL TO - 0x2265: 0x99, # GREATER-THAN OR EQUAL TO - 0x2320: 0x93, # TOP HALF INTEGRAL - 0x2321: 0x9B, # BOTTOM HALF INTEGRAL - 0x2500: 0x80, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x81, # BOX DRAWINGS LIGHT VERTICAL - 0x250C: 0x82, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x83, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x84, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x85, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251C: 0x86, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x87, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252C: 0x88, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x89, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253C: 0x8A, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0xA0, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0xA1, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0xA2, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2554: 0xA5, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0xA8, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0xA9, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0xAA, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255A: 0xAB, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255B: 0xAC, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255D: 0xAE, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255E: 0xAF, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255F: 0xB0, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0xB1, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0xB2, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2563: 0xB5, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0xB8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0xB9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0xBA, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0xBB, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256A: 0xBC, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256C: 0xBE, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x8B, # UPPER HALF BLOCK - 0x2584: 0x8C, # LOWER HALF BLOCK - 0x2588: 0x8D, # FULL BLOCK - 0x258C: 0x8E, # LEFT HALF BLOCK - 0x2590: 0x8F, # RIGHT HALF BLOCK - 0x2591: 0x90, # LIGHT SHADE - 0x2592: 0x91, # MEDIUM SHADE - 0x2593: 0x92, # DARK SHADE - 0x25A0: 0x94, # BLACK SQUARE + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0x9A, # NO-BREAK SPACE + 0x00A9: 0xBF, # COPYRIGHT SIGN + 0x00B0: 0x9C, # DEGREE SIGN + 0x00B2: 0x9D, # SUPERSCRIPT TWO + 0x00B7: 0x9E, # MIDDLE DOT + 0x00F7: 0x9F, # DIVISION SIGN + 0x0401: 0xB3, # CYRILLIC CAPITAL LETTER IO + 0x0404: 0xB4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0406: 0xB6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0xB7, # CYRILLIC CAPITAL LETTER YI (UKRAINIAN) + 0x0410: 0xE1, # CYRILLIC CAPITAL LETTER A + 0x0411: 0xE2, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0xF7, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0xE7, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0xE4, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0xE5, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0xF6, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0xFA, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0xE9, # CYRILLIC CAPITAL LETTER I + 0x0419: 0xEA, # CYRILLIC CAPITAL LETTER SHORT I + 0x041A: 0xEB, # CYRILLIC CAPITAL LETTER KA + 0x041B: 0xEC, # CYRILLIC CAPITAL LETTER EL + 0x041C: 0xED, # CYRILLIC CAPITAL LETTER EM + 0x041D: 0xEE, # CYRILLIC CAPITAL LETTER EN + 0x041E: 0xEF, # CYRILLIC CAPITAL LETTER O + 0x041F: 0xF0, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0xF2, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0xF3, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0xF4, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0xF5, # CYRILLIC CAPITAL LETTER U + 0x0424: 0xE6, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0xE8, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0xE3, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0xFE, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0xFB, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0xFD, # CYRILLIC CAPITAL LETTER SHCHA + 0x042A: 0xFF, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042B: 0xF9, # CYRILLIC CAPITAL LETTER YERU + 0x042C: 0xF8, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042D: 0xFC, # CYRILLIC CAPITAL LETTER E + 0x042E: 0xE0, # CYRILLIC CAPITAL LETTER YU + 0x042F: 0xF1, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0xC1, # CYRILLIC SMALL LETTER A + 0x0431: 0xC2, # CYRILLIC SMALL LETTER BE + 0x0432: 0xD7, # CYRILLIC SMALL LETTER VE + 0x0433: 0xC7, # CYRILLIC SMALL LETTER GHE + 0x0434: 0xC4, # CYRILLIC SMALL LETTER DE + 0x0435: 0xC5, # CYRILLIC SMALL LETTER IE + 0x0436: 0xD6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0xDA, # CYRILLIC SMALL LETTER ZE + 0x0438: 0xC9, # CYRILLIC SMALL LETTER I + 0x0439: 0xCA, # CYRILLIC SMALL LETTER SHORT I + 0x043A: 0xCB, # CYRILLIC SMALL LETTER KA + 0x043B: 0xCC, # CYRILLIC SMALL LETTER EL + 0x043C: 0xCD, # CYRILLIC SMALL LETTER EM + 0x043D: 0xCE, # CYRILLIC SMALL LETTER EN + 0x043E: 0xCF, # CYRILLIC SMALL LETTER O + 0x043F: 0xD0, # CYRILLIC SMALL LETTER PE + 0x0440: 0xD2, # CYRILLIC SMALL LETTER ER + 0x0441: 0xD3, # CYRILLIC SMALL LETTER ES + 0x0442: 0xD4, # CYRILLIC SMALL LETTER TE + 0x0443: 0xD5, # CYRILLIC SMALL LETTER U + 0x0444: 0xC6, # CYRILLIC SMALL LETTER EF + 0x0445: 0xC8, # CYRILLIC SMALL LETTER HA + 0x0446: 0xC3, # CYRILLIC SMALL LETTER TSE + 0x0447: 0xDE, # CYRILLIC SMALL LETTER CHE + 0x0448: 0xDB, # CYRILLIC SMALL LETTER SHA + 0x0449: 0xDD, # CYRILLIC SMALL LETTER SHCHA + 0x044A: 0xDF, # CYRILLIC SMALL LETTER HARD SIGN + 0x044B: 0xD9, # CYRILLIC SMALL LETTER YERU + 0x044C: 0xD8, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044D: 0xDC, # CYRILLIC SMALL LETTER E + 0x044E: 0xC0, # CYRILLIC SMALL LETTER YU + 0x044F: 0xD1, # CYRILLIC SMALL LETTER YA + 0x0451: 0xA3, # CYRILLIC SMALL LETTER IO + 0x0454: 0xA4, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0456: 0xA6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0xA7, # CYRILLIC SMALL LETTER YI (UKRAINIAN) + 0x0490: 0xBD, # CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN + 0x0491: 0xAD, # CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN + 0x2219: 0x95, # BULLET OPERATOR + 0x221A: 0x96, # SQUARE ROOT + 0x2248: 0x97, # ALMOST EQUAL TO + 0x2264: 0x98, # LESS-THAN OR EQUAL TO + 0x2265: 0x99, # GREATER-THAN OR EQUAL TO + 0x2320: 0x93, # TOP HALF INTEGRAL + 0x2321: 0x9B, # BOTTOM HALF INTEGRAL + 0x2500: 0x80, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x81, # BOX DRAWINGS LIGHT VERTICAL + 0x250C: 0x82, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x83, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x84, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x85, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251C: 0x86, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x87, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252C: 0x88, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x89, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253C: 0x8A, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0xA0, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0xA1, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0xA2, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2554: 0xA5, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0xA8, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0xA9, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0xAA, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255A: 0xAB, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255B: 0xAC, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255D: 0xAE, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255E: 0xAF, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255F: 0xB0, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0xB1, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0xB2, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2563: 0xB5, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0xB8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0xB9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0xBA, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0xBB, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256A: 0xBC, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256C: 0xBE, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x8B, # UPPER HALF BLOCK + 0x2584: 0x8C, # LOWER HALF BLOCK + 0x2588: 0x8D, # FULL BLOCK + 0x258C: 0x8E, # LEFT HALF BLOCK + 0x2590: 0x8F, # RIGHT HALF BLOCK + 0x2591: 0x90, # LIGHT SHADE + 0x2592: 0x91, # MEDIUM SHADE + 0x2593: 0x92, # DARK SHADE + 0x25A0: 0x94, # BLACK SQUARE } - Modified: python/branches/ssize_t/Lib/encodings/mac_arabic.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/mac_arabic.py (original) +++ python/branches/ssize_t/Lib/encodings/mac_arabic.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,654 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x00a0, # NO-BREAK SPACE, right-left - 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x06ba, # ARABIC LETTER NOON GHUNNA - 0x008c: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0093: 0x2026, # HORIZONTAL ELLIPSIS, right-left - 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x0098: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00f7, # DIVISION SIGN, right-left - 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x0020, # SPACE, right-left - 0x00a1: 0x0021, # EXCLAMATION MARK, right-left - 0x00a2: 0x0022, # QUOTATION MARK, right-left - 0x00a3: 0x0023, # NUMBER SIGN, right-left - 0x00a4: 0x0024, # DOLLAR SIGN, right-left - 0x00a5: 0x066a, # ARABIC PERCENT SIGN - 0x00a6: 0x0026, # AMPERSAND, right-left - 0x00a7: 0x0027, # APOSTROPHE, right-left - 0x00a8: 0x0028, # LEFT PARENTHESIS, right-left - 0x00a9: 0x0029, # RIGHT PARENTHESIS, right-left - 0x00aa: 0x002a, # ASTERISK, right-left - 0x00ab: 0x002b, # PLUS SIGN, right-left - 0x00ac: 0x060c, # ARABIC COMMA - 0x00ad: 0x002d, # HYPHEN-MINUS, right-left - 0x00ae: 0x002e, # FULL STOP, right-left - 0x00af: 0x002f, # SOLIDUS, right-left - 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO, right-left (need override) - 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE, right-left (need override) - 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO, right-left (need override) - 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE, right-left (need override) - 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR, right-left (need override) - 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE, right-left (need override) - 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX, right-left (need override) - 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN, right-left (need override) - 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT, right-left (need override) - 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE, right-left (need override) - 0x00ba: 0x003a, # COLON, right-left - 0x00bb: 0x061b, # ARABIC SEMICOLON - 0x00bc: 0x003c, # LESS-THAN SIGN, right-left - 0x00bd: 0x003d, # EQUALS SIGN, right-left - 0x00be: 0x003e, # GREATER-THAN SIGN, right-left - 0x00bf: 0x061f, # ARABIC QUESTION MARK - 0x00c0: 0x274a, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left - 0x00c1: 0x0621, # ARABIC LETTER HAMZA - 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x00c7: 0x0627, # ARABIC LETTER ALEF - 0x00c8: 0x0628, # ARABIC LETTER BEH - 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA - 0x00ca: 0x062a, # ARABIC LETTER TEH - 0x00cb: 0x062b, # ARABIC LETTER THEH - 0x00cc: 0x062c, # ARABIC LETTER JEEM - 0x00cd: 0x062d, # ARABIC LETTER HAH - 0x00ce: 0x062e, # ARABIC LETTER KHAH - 0x00cf: 0x062f, # ARABIC LETTER DAL - 0x00d0: 0x0630, # ARABIC LETTER THAL - 0x00d1: 0x0631, # ARABIC LETTER REH - 0x00d2: 0x0632, # ARABIC LETTER ZAIN - 0x00d3: 0x0633, # ARABIC LETTER SEEN - 0x00d4: 0x0634, # ARABIC LETTER SHEEN - 0x00d5: 0x0635, # ARABIC LETTER SAD - 0x00d6: 0x0636, # ARABIC LETTER DAD - 0x00d7: 0x0637, # ARABIC LETTER TAH - 0x00d8: 0x0638, # ARABIC LETTER ZAH - 0x00d9: 0x0639, # ARABIC LETTER AIN - 0x00da: 0x063a, # ARABIC LETTER GHAIN - 0x00db: 0x005b, # LEFT SQUARE BRACKET, right-left - 0x00dc: 0x005c, # REVERSE SOLIDUS, right-left - 0x00dd: 0x005d, # RIGHT SQUARE BRACKET, right-left - 0x00de: 0x005e, # CIRCUMFLEX ACCENT, right-left - 0x00df: 0x005f, # LOW LINE, right-left - 0x00e0: 0x0640, # ARABIC TATWEEL - 0x00e1: 0x0641, # ARABIC LETTER FEH - 0x00e2: 0x0642, # ARABIC LETTER QAF - 0x00e3: 0x0643, # ARABIC LETTER KAF - 0x00e4: 0x0644, # ARABIC LETTER LAM - 0x00e5: 0x0645, # ARABIC LETTER MEEM - 0x00e6: 0x0646, # ARABIC LETTER NOON - 0x00e7: 0x0647, # ARABIC LETTER HEH - 0x00e8: 0x0648, # ARABIC LETTER WAW - 0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA - 0x00ea: 0x064a, # ARABIC LETTER YEH - 0x00eb: 0x064b, # ARABIC FATHATAN - 0x00ec: 0x064c, # ARABIC DAMMATAN - 0x00ed: 0x064d, # ARABIC KASRATAN - 0x00ee: 0x064e, # ARABIC FATHA - 0x00ef: 0x064f, # ARABIC DAMMA - 0x00f0: 0x0650, # ARABIC KASRA - 0x00f1: 0x0651, # ARABIC SHADDA - 0x00f2: 0x0652, # ARABIC SUKUN - 0x00f3: 0x067e, # ARABIC LETTER PEH - 0x00f4: 0x0679, # ARABIC LETTER TTEH - 0x00f5: 0x0686, # ARABIC LETTER TCHEH - 0x00f6: 0x06d5, # ARABIC LETTER AE - 0x00f7: 0x06a4, # ARABIC LETTER VEH - 0x00f8: 0x06af, # ARABIC LETTER GAF - 0x00f9: 0x0688, # ARABIC LETTER DDAL - 0x00fa: 0x0691, # ARABIC LETTER RREH - 0x00fb: 0x007b, # LEFT CURLY BRACKET, right-left - 0x00fc: 0x007c, # VERTICAL LINE, right-left - 0x00fd: 0x007d, # RIGHT CURLY BRACKET, right-left - 0x00fe: 0x0698, # ARABIC LETTER JEH - 0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE + 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0081: 0x00a0, # NO-BREAK SPACE, right-left + 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x008b: 0x06ba, # ARABIC LETTER NOON GHUNNA + 0x008c: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x0093: 0x2026, # HORIZONTAL ELLIPSIS, right-left + 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x0098: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x009b: 0x00f7, # DIVISION SIGN, right-left + 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00a0: 0x0020, # SPACE, right-left + 0x00a1: 0x0021, # EXCLAMATION MARK, right-left + 0x00a2: 0x0022, # QUOTATION MARK, right-left + 0x00a3: 0x0023, # NUMBER SIGN, right-left + 0x00a4: 0x0024, # DOLLAR SIGN, right-left + 0x00a5: 0x066a, # ARABIC PERCENT SIGN + 0x00a6: 0x0026, # AMPERSAND, right-left + 0x00a7: 0x0027, # APOSTROPHE, right-left + 0x00a8: 0x0028, # LEFT PARENTHESIS, right-left + 0x00a9: 0x0029, # RIGHT PARENTHESIS, right-left + 0x00aa: 0x002a, # ASTERISK, right-left + 0x00ab: 0x002b, # PLUS SIGN, right-left + 0x00ac: 0x060c, # ARABIC COMMA + 0x00ad: 0x002d, # HYPHEN-MINUS, right-left + 0x00ae: 0x002e, # FULL STOP, right-left + 0x00af: 0x002f, # SOLIDUS, right-left + 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO, right-left (need override) + 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE, right-left (need override) + 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO, right-left (need override) + 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE, right-left (need override) + 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR, right-left (need override) + 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE, right-left (need override) + 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX, right-left (need override) + 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN, right-left (need override) + 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT, right-left (need override) + 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE, right-left (need override) + 0x00ba: 0x003a, # COLON, right-left + 0x00bb: 0x061b, # ARABIC SEMICOLON + 0x00bc: 0x003c, # LESS-THAN SIGN, right-left + 0x00bd: 0x003d, # EQUALS SIGN, right-left + 0x00be: 0x003e, # GREATER-THAN SIGN, right-left + 0x00bf: 0x061f, # ARABIC QUESTION MARK + 0x00c0: 0x274a, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left + 0x00c1: 0x0621, # ARABIC LETTER HAMZA + 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x00c7: 0x0627, # ARABIC LETTER ALEF + 0x00c8: 0x0628, # ARABIC LETTER BEH + 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA + 0x00ca: 0x062a, # ARABIC LETTER TEH + 0x00cb: 0x062b, # ARABIC LETTER THEH + 0x00cc: 0x062c, # ARABIC LETTER JEEM + 0x00cd: 0x062d, # ARABIC LETTER HAH + 0x00ce: 0x062e, # ARABIC LETTER KHAH + 0x00cf: 0x062f, # ARABIC LETTER DAL + 0x00d0: 0x0630, # ARABIC LETTER THAL + 0x00d1: 0x0631, # ARABIC LETTER REH + 0x00d2: 0x0632, # ARABIC LETTER ZAIN + 0x00d3: 0x0633, # ARABIC LETTER SEEN + 0x00d4: 0x0634, # ARABIC LETTER SHEEN + 0x00d5: 0x0635, # ARABIC LETTER SAD + 0x00d6: 0x0636, # ARABIC LETTER DAD + 0x00d7: 0x0637, # ARABIC LETTER TAH + 0x00d8: 0x0638, # ARABIC LETTER ZAH + 0x00d9: 0x0639, # ARABIC LETTER AIN + 0x00da: 0x063a, # ARABIC LETTER GHAIN + 0x00db: 0x005b, # LEFT SQUARE BRACKET, right-left + 0x00dc: 0x005c, # REVERSE SOLIDUS, right-left + 0x00dd: 0x005d, # RIGHT SQUARE BRACKET, right-left + 0x00de: 0x005e, # CIRCUMFLEX ACCENT, right-left + 0x00df: 0x005f, # LOW LINE, right-left + 0x00e0: 0x0640, # ARABIC TATWEEL + 0x00e1: 0x0641, # ARABIC LETTER FEH + 0x00e2: 0x0642, # ARABIC LETTER QAF + 0x00e3: 0x0643, # ARABIC LETTER KAF + 0x00e4: 0x0644, # ARABIC LETTER LAM + 0x00e5: 0x0645, # ARABIC LETTER MEEM + 0x00e6: 0x0646, # ARABIC LETTER NOON + 0x00e7: 0x0647, # ARABIC LETTER HEH + 0x00e8: 0x0648, # ARABIC LETTER WAW + 0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA + 0x00ea: 0x064a, # ARABIC LETTER YEH + 0x00eb: 0x064b, # ARABIC FATHATAN + 0x00ec: 0x064c, # ARABIC DAMMATAN + 0x00ed: 0x064d, # ARABIC KASRATAN + 0x00ee: 0x064e, # ARABIC FATHA + 0x00ef: 0x064f, # ARABIC DAMMA + 0x00f0: 0x0650, # ARABIC KASRA + 0x00f1: 0x0651, # ARABIC SHADDA + 0x00f2: 0x0652, # ARABIC SUKUN + 0x00f3: 0x067e, # ARABIC LETTER PEH + 0x00f4: 0x0679, # ARABIC LETTER TTEH + 0x00f5: 0x0686, # ARABIC LETTER TCHEH + 0x00f6: 0x06d5, # ARABIC LETTER AE + 0x00f7: 0x06a4, # ARABIC LETTER VEH + 0x00f8: 0x06af, # ARABIC LETTER GAF + 0x00f9: 0x0688, # ARABIC LETTER DDAL + 0x00fa: 0x0691, # ARABIC LETTER RREH + 0x00fb: 0x007b, # LEFT CURLY BRACKET, right-left + 0x00fc: 0x007c, # VERTICAL LINE, right-left + 0x00fd: 0x007d, # RIGHT CURLY BRACKET, right-left + 0x00fe: 0x0698, # ARABIC LETTER JEH + 0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE }) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> CONTROL CHARACTER - u'\x01' # 0x0001 -> CONTROL CHARACTER - u'\x02' # 0x0002 -> CONTROL CHARACTER - u'\x03' # 0x0003 -> CONTROL CHARACTER - u'\x04' # 0x0004 -> CONTROL CHARACTER - u'\x05' # 0x0005 -> CONTROL CHARACTER - u'\x06' # 0x0006 -> CONTROL CHARACTER - u'\x07' # 0x0007 -> CONTROL CHARACTER - u'\x08' # 0x0008 -> CONTROL CHARACTER - u'\t' # 0x0009 -> CONTROL CHARACTER - u'\n' # 0x000a -> CONTROL CHARACTER - u'\x0b' # 0x000b -> CONTROL CHARACTER - u'\x0c' # 0x000c -> CONTROL CHARACTER - u'\r' # 0x000d -> CONTROL CHARACTER - u'\x0e' # 0x000e -> CONTROL CHARACTER - u'\x0f' # 0x000f -> CONTROL CHARACTER - u'\x10' # 0x0010 -> CONTROL CHARACTER - u'\x11' # 0x0011 -> CONTROL CHARACTER - u'\x12' # 0x0012 -> CONTROL CHARACTER - u'\x13' # 0x0013 -> CONTROL CHARACTER - u'\x14' # 0x0014 -> CONTROL CHARACTER - u'\x15' # 0x0015 -> CONTROL CHARACTER - u'\x16' # 0x0016 -> CONTROL CHARACTER - u'\x17' # 0x0017 -> CONTROL CHARACTER - u'\x18' # 0x0018 -> CONTROL CHARACTER - u'\x19' # 0x0019 -> CONTROL CHARACTER - u'\x1a' # 0x001a -> CONTROL CHARACTER - u'\x1b' # 0x001b -> CONTROL CHARACTER - u'\x1c' # 0x001c -> CONTROL CHARACTER - u'\x1d' # 0x001d -> CONTROL CHARACTER - u'\x1e' # 0x001e -> CONTROL CHARACTER - u'\x1f' # 0x001f -> CONTROL CHARACTER - u' ' # 0x0020 -> SPACE, left-right - u'!' # 0x0021 -> EXCLAMATION MARK, left-right - u'"' # 0x0022 -> QUOTATION MARK, left-right - u'#' # 0x0023 -> NUMBER SIGN, left-right - u'$' # 0x0024 -> DOLLAR SIGN, left-right - u'%' # 0x0025 -> PERCENT SIGN, left-right - u'&' # 0x0026 -> AMPERSAND, left-right - u"'" # 0x0027 -> APOSTROPHE, left-right - u'(' # 0x0028 -> LEFT PARENTHESIS, left-right - u')' # 0x0029 -> RIGHT PARENTHESIS, left-right - u'*' # 0x002a -> ASTERISK, left-right - u'+' # 0x002b -> PLUS SIGN, left-right - u',' # 0x002c -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR - u'-' # 0x002d -> HYPHEN-MINUS, left-right - u'.' # 0x002e -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR - u'/' # 0x002f -> SOLIDUS, left-right - u'0' # 0x0030 -> DIGIT ZERO; in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE; in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO; in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR; in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE; in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX; in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE; in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE - u':' # 0x003a -> COLON, left-right - u';' # 0x003b -> SEMICOLON, left-right - u'<' # 0x003c -> LESS-THAN SIGN, left-right - u'=' # 0x003d -> EQUALS SIGN, left-right - u'>' # 0x003e -> GREATER-THAN SIGN, left-right - u'?' # 0x003f -> QUESTION MARK, left-right - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET, left-right - u'\\' # 0x005c -> REVERSE SOLIDUS, left-right - u']' # 0x005d -> RIGHT SQUARE BRACKET, left-right - u'^' # 0x005e -> CIRCUMFLEX ACCENT, left-right - u'_' # 0x005f -> LOW LINE, left-right - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET, left-right - u'|' # 0x007c -> VERTICAL LINE, left-right - u'}' # 0x007d -> RIGHT CURLY BRACKET, left-right - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> CONTROL CHARACTER - u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xa0' # 0x0081 -> NO-BREAK SPACE, right-left - u'\xc7' # 0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x0084 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x0087 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u06ba' # 0x008b -> ARABIC LETTER NOON GHUNNA - u'\xab' # 0x008c -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x0092 -> LATIN SMALL LETTER I WITH ACUTE - u'\u2026' # 0x0093 -> HORIZONTAL ELLIPSIS, right-left - u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x0096 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x0097 -> LATIN SMALL LETTER O WITH ACUTE - u'\xbb' # 0x0098 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0x009b -> DIVISION SIGN, right-left - u'\xfa' # 0x009c -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS - u' ' # 0x00a0 -> SPACE, right-left - u'!' # 0x00a1 -> EXCLAMATION MARK, right-left - u'"' # 0x00a2 -> QUOTATION MARK, right-left - u'#' # 0x00a3 -> NUMBER SIGN, right-left - u'$' # 0x00a4 -> DOLLAR SIGN, right-left - u'\u066a' # 0x00a5 -> ARABIC PERCENT SIGN - u'&' # 0x00a6 -> AMPERSAND, right-left - u"'" # 0x00a7 -> APOSTROPHE, right-left - u'(' # 0x00a8 -> LEFT PARENTHESIS, right-left - u')' # 0x00a9 -> RIGHT PARENTHESIS, right-left - u'*' # 0x00aa -> ASTERISK, right-left - u'+' # 0x00ab -> PLUS SIGN, right-left - u'\u060c' # 0x00ac -> ARABIC COMMA - u'-' # 0x00ad -> HYPHEN-MINUS, right-left - u'.' # 0x00ae -> FULL STOP, right-left - u'/' # 0x00af -> SOLIDUS, right-left - u'\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO, right-left (need override) - u'\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE, right-left (need override) - u'\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO, right-left (need override) - u'\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE, right-left (need override) - u'\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR, right-left (need override) - u'\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE, right-left (need override) - u'\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX, right-left (need override) - u'\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN, right-left (need override) - u'\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT, right-left (need override) - u'\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE, right-left (need override) - u':' # 0x00ba -> COLON, right-left - u'\u061b' # 0x00bb -> ARABIC SEMICOLON - u'<' # 0x00bc -> LESS-THAN SIGN, right-left - u'=' # 0x00bd -> EQUALS SIGN, right-left - u'>' # 0x00be -> GREATER-THAN SIGN, right-left - u'\u061f' # 0x00bf -> ARABIC QUESTION MARK - u'\u274a' # 0x00c0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left - u'\u0621' # 0x00c1 -> ARABIC LETTER HAMZA - u'\u0622' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\u0625' # 0x00c5 -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0x00c7 -> ARABIC LETTER ALEF - u'\u0628' # 0x00c8 -> ARABIC LETTER BEH - u'\u0629' # 0x00c9 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0x00ca -> ARABIC LETTER TEH - u'\u062b' # 0x00cb -> ARABIC LETTER THEH - u'\u062c' # 0x00cc -> ARABIC LETTER JEEM - u'\u062d' # 0x00cd -> ARABIC LETTER HAH - u'\u062e' # 0x00ce -> ARABIC LETTER KHAH - u'\u062f' # 0x00cf -> ARABIC LETTER DAL - u'\u0630' # 0x00d0 -> ARABIC LETTER THAL - u'\u0631' # 0x00d1 -> ARABIC LETTER REH - u'\u0632' # 0x00d2 -> ARABIC LETTER ZAIN - u'\u0633' # 0x00d3 -> ARABIC LETTER SEEN - u'\u0634' # 0x00d4 -> ARABIC LETTER SHEEN - u'\u0635' # 0x00d5 -> ARABIC LETTER SAD - u'\u0636' # 0x00d6 -> ARABIC LETTER DAD - u'\u0637' # 0x00d7 -> ARABIC LETTER TAH - u'\u0638' # 0x00d8 -> ARABIC LETTER ZAH - u'\u0639' # 0x00d9 -> ARABIC LETTER AIN - u'\u063a' # 0x00da -> ARABIC LETTER GHAIN - u'[' # 0x00db -> LEFT SQUARE BRACKET, right-left - u'\\' # 0x00dc -> REVERSE SOLIDUS, right-left - u']' # 0x00dd -> RIGHT SQUARE BRACKET, right-left - u'^' # 0x00de -> CIRCUMFLEX ACCENT, right-left - u'_' # 0x00df -> LOW LINE, right-left - u'\u0640' # 0x00e0 -> ARABIC TATWEEL - u'\u0641' # 0x00e1 -> ARABIC LETTER FEH - u'\u0642' # 0x00e2 -> ARABIC LETTER QAF - u'\u0643' # 0x00e3 -> ARABIC LETTER KAF - u'\u0644' # 0x00e4 -> ARABIC LETTER LAM - u'\u0645' # 0x00e5 -> ARABIC LETTER MEEM - u'\u0646' # 0x00e6 -> ARABIC LETTER NOON - u'\u0647' # 0x00e7 -> ARABIC LETTER HEH - u'\u0648' # 0x00e8 -> ARABIC LETTER WAW - u'\u0649' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0x00ea -> ARABIC LETTER YEH - u'\u064b' # 0x00eb -> ARABIC FATHATAN - u'\u064c' # 0x00ec -> ARABIC DAMMATAN - u'\u064d' # 0x00ed -> ARABIC KASRATAN - u'\u064e' # 0x00ee -> ARABIC FATHA - u'\u064f' # 0x00ef -> ARABIC DAMMA - u'\u0650' # 0x00f0 -> ARABIC KASRA - u'\u0651' # 0x00f1 -> ARABIC SHADDA - u'\u0652' # 0x00f2 -> ARABIC SUKUN - u'\u067e' # 0x00f3 -> ARABIC LETTER PEH - u'\u0679' # 0x00f4 -> ARABIC LETTER TTEH - u'\u0686' # 0x00f5 -> ARABIC LETTER TCHEH - u'\u06d5' # 0x00f6 -> ARABIC LETTER AE - u'\u06a4' # 0x00f7 -> ARABIC LETTER VEH - u'\u06af' # 0x00f8 -> ARABIC LETTER GAF - u'\u0688' # 0x00f9 -> ARABIC LETTER DDAL - u'\u0691' # 0x00fa -> ARABIC LETTER RREH - u'{' # 0x00fb -> LEFT CURLY BRACKET, right-left - u'|' # 0x00fc -> VERTICAL LINE, right-left - u'}' # 0x00fd -> RIGHT CURLY BRACKET, right-left - u'\u0698' # 0x00fe -> ARABIC LETTER JEH - u'\u06d2' # 0x00ff -> ARABIC LETTER YEH BARREE + u'\x00' # 0x0000 -> CONTROL CHARACTER + u'\x01' # 0x0001 -> CONTROL CHARACTER + u'\x02' # 0x0002 -> CONTROL CHARACTER + u'\x03' # 0x0003 -> CONTROL CHARACTER + u'\x04' # 0x0004 -> CONTROL CHARACTER + u'\x05' # 0x0005 -> CONTROL CHARACTER + u'\x06' # 0x0006 -> CONTROL CHARACTER + u'\x07' # 0x0007 -> CONTROL CHARACTER + u'\x08' # 0x0008 -> CONTROL CHARACTER + u'\t' # 0x0009 -> CONTROL CHARACTER + u'\n' # 0x000a -> CONTROL CHARACTER + u'\x0b' # 0x000b -> CONTROL CHARACTER + u'\x0c' # 0x000c -> CONTROL CHARACTER + u'\r' # 0x000d -> CONTROL CHARACTER + u'\x0e' # 0x000e -> CONTROL CHARACTER + u'\x0f' # 0x000f -> CONTROL CHARACTER + u'\x10' # 0x0010 -> CONTROL CHARACTER + u'\x11' # 0x0011 -> CONTROL CHARACTER + u'\x12' # 0x0012 -> CONTROL CHARACTER + u'\x13' # 0x0013 -> CONTROL CHARACTER + u'\x14' # 0x0014 -> CONTROL CHARACTER + u'\x15' # 0x0015 -> CONTROL CHARACTER + u'\x16' # 0x0016 -> CONTROL CHARACTER + u'\x17' # 0x0017 -> CONTROL CHARACTER + u'\x18' # 0x0018 -> CONTROL CHARACTER + u'\x19' # 0x0019 -> CONTROL CHARACTER + u'\x1a' # 0x001a -> CONTROL CHARACTER + u'\x1b' # 0x001b -> CONTROL CHARACTER + u'\x1c' # 0x001c -> CONTROL CHARACTER + u'\x1d' # 0x001d -> CONTROL CHARACTER + u'\x1e' # 0x001e -> CONTROL CHARACTER + u'\x1f' # 0x001f -> CONTROL CHARACTER + u' ' # 0x0020 -> SPACE, left-right + u'!' # 0x0021 -> EXCLAMATION MARK, left-right + u'"' # 0x0022 -> QUOTATION MARK, left-right + u'#' # 0x0023 -> NUMBER SIGN, left-right + u'$' # 0x0024 -> DOLLAR SIGN, left-right + u'%' # 0x0025 -> PERCENT SIGN, left-right + u'&' # 0x0026 -> AMPERSAND, left-right + u"'" # 0x0027 -> APOSTROPHE, left-right + u'(' # 0x0028 -> LEFT PARENTHESIS, left-right + u')' # 0x0029 -> RIGHT PARENTHESIS, left-right + u'*' # 0x002a -> ASTERISK, left-right + u'+' # 0x002b -> PLUS SIGN, left-right + u',' # 0x002c -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR + u'-' # 0x002d -> HYPHEN-MINUS, left-right + u'.' # 0x002e -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR + u'/' # 0x002f -> SOLIDUS, left-right + u'0' # 0x0030 -> DIGIT ZERO; in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE; in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO; in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR; in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE; in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX; in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE; in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE + u':' # 0x003a -> COLON, left-right + u';' # 0x003b -> SEMICOLON, left-right + u'<' # 0x003c -> LESS-THAN SIGN, left-right + u'=' # 0x003d -> EQUALS SIGN, left-right + u'>' # 0x003e -> GREATER-THAN SIGN, left-right + u'?' # 0x003f -> QUESTION MARK, left-right + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET, left-right + u'\\' # 0x005c -> REVERSE SOLIDUS, left-right + u']' # 0x005d -> RIGHT SQUARE BRACKET, left-right + u'^' # 0x005e -> CIRCUMFLEX ACCENT, left-right + u'_' # 0x005f -> LOW LINE, left-right + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET, left-right + u'|' # 0x007c -> VERTICAL LINE, left-right + u'}' # 0x007d -> RIGHT CURLY BRACKET, left-right + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> CONTROL CHARACTER + u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xa0' # 0x0081 -> NO-BREAK SPACE, right-left + u'\xc7' # 0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x0084 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x0087 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u06ba' # 0x008b -> ARABIC LETTER NOON GHUNNA + u'\xab' # 0x008c -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x0092 -> LATIN SMALL LETTER I WITH ACUTE + u'\u2026' # 0x0093 -> HORIZONTAL ELLIPSIS, right-left + u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x0096 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x0097 -> LATIN SMALL LETTER O WITH ACUTE + u'\xbb' # 0x0098 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x009b -> DIVISION SIGN, right-left + u'\xfa' # 0x009c -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS + u' ' # 0x00a0 -> SPACE, right-left + u'!' # 0x00a1 -> EXCLAMATION MARK, right-left + u'"' # 0x00a2 -> QUOTATION MARK, right-left + u'#' # 0x00a3 -> NUMBER SIGN, right-left + u'$' # 0x00a4 -> DOLLAR SIGN, right-left + u'\u066a' # 0x00a5 -> ARABIC PERCENT SIGN + u'&' # 0x00a6 -> AMPERSAND, right-left + u"'" # 0x00a7 -> APOSTROPHE, right-left + u'(' # 0x00a8 -> LEFT PARENTHESIS, right-left + u')' # 0x00a9 -> RIGHT PARENTHESIS, right-left + u'*' # 0x00aa -> ASTERISK, right-left + u'+' # 0x00ab -> PLUS SIGN, right-left + u'\u060c' # 0x00ac -> ARABIC COMMA + u'-' # 0x00ad -> HYPHEN-MINUS, right-left + u'.' # 0x00ae -> FULL STOP, right-left + u'/' # 0x00af -> SOLIDUS, right-left + u'\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO, right-left (need override) + u'\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE, right-left (need override) + u'\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO, right-left (need override) + u'\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE, right-left (need override) + u'\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR, right-left (need override) + u'\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE, right-left (need override) + u'\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX, right-left (need override) + u'\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN, right-left (need override) + u'\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT, right-left (need override) + u'\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE, right-left (need override) + u':' # 0x00ba -> COLON, right-left + u'\u061b' # 0x00bb -> ARABIC SEMICOLON + u'<' # 0x00bc -> LESS-THAN SIGN, right-left + u'=' # 0x00bd -> EQUALS SIGN, right-left + u'>' # 0x00be -> GREATER-THAN SIGN, right-left + u'\u061f' # 0x00bf -> ARABIC QUESTION MARK + u'\u274a' # 0x00c0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left + u'\u0621' # 0x00c1 -> ARABIC LETTER HAMZA + u'\u0622' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE + u'\u0623' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE + u'\u0624' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE + u'\u0625' # 0x00c5 -> ARABIC LETTER ALEF WITH HAMZA BELOW + u'\u0626' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE + u'\u0627' # 0x00c7 -> ARABIC LETTER ALEF + u'\u0628' # 0x00c8 -> ARABIC LETTER BEH + u'\u0629' # 0x00c9 -> ARABIC LETTER TEH MARBUTA + u'\u062a' # 0x00ca -> ARABIC LETTER TEH + u'\u062b' # 0x00cb -> ARABIC LETTER THEH + u'\u062c' # 0x00cc -> ARABIC LETTER JEEM + u'\u062d' # 0x00cd -> ARABIC LETTER HAH + u'\u062e' # 0x00ce -> ARABIC LETTER KHAH + u'\u062f' # 0x00cf -> ARABIC LETTER DAL + u'\u0630' # 0x00d0 -> ARABIC LETTER THAL + u'\u0631' # 0x00d1 -> ARABIC LETTER REH + u'\u0632' # 0x00d2 -> ARABIC LETTER ZAIN + u'\u0633' # 0x00d3 -> ARABIC LETTER SEEN + u'\u0634' # 0x00d4 -> ARABIC LETTER SHEEN + u'\u0635' # 0x00d5 -> ARABIC LETTER SAD + u'\u0636' # 0x00d6 -> ARABIC LETTER DAD + u'\u0637' # 0x00d7 -> ARABIC LETTER TAH + u'\u0638' # 0x00d8 -> ARABIC LETTER ZAH + u'\u0639' # 0x00d9 -> ARABIC LETTER AIN + u'\u063a' # 0x00da -> ARABIC LETTER GHAIN + u'[' # 0x00db -> LEFT SQUARE BRACKET, right-left + u'\\' # 0x00dc -> REVERSE SOLIDUS, right-left + u']' # 0x00dd -> RIGHT SQUARE BRACKET, right-left + u'^' # 0x00de -> CIRCUMFLEX ACCENT, right-left + u'_' # 0x00df -> LOW LINE, right-left + u'\u0640' # 0x00e0 -> ARABIC TATWEEL + u'\u0641' # 0x00e1 -> ARABIC LETTER FEH + u'\u0642' # 0x00e2 -> ARABIC LETTER QAF + u'\u0643' # 0x00e3 -> ARABIC LETTER KAF + u'\u0644' # 0x00e4 -> ARABIC LETTER LAM + u'\u0645' # 0x00e5 -> ARABIC LETTER MEEM + u'\u0646' # 0x00e6 -> ARABIC LETTER NOON + u'\u0647' # 0x00e7 -> ARABIC LETTER HEH + u'\u0648' # 0x00e8 -> ARABIC LETTER WAW + u'\u0649' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA + u'\u064a' # 0x00ea -> ARABIC LETTER YEH + u'\u064b' # 0x00eb -> ARABIC FATHATAN + u'\u064c' # 0x00ec -> ARABIC DAMMATAN + u'\u064d' # 0x00ed -> ARABIC KASRATAN + u'\u064e' # 0x00ee -> ARABIC FATHA + u'\u064f' # 0x00ef -> ARABIC DAMMA + u'\u0650' # 0x00f0 -> ARABIC KASRA + u'\u0651' # 0x00f1 -> ARABIC SHADDA + u'\u0652' # 0x00f2 -> ARABIC SUKUN + u'\u067e' # 0x00f3 -> ARABIC LETTER PEH + u'\u0679' # 0x00f4 -> ARABIC LETTER TTEH + u'\u0686' # 0x00f5 -> ARABIC LETTER TCHEH + u'\u06d5' # 0x00f6 -> ARABIC LETTER AE + u'\u06a4' # 0x00f7 -> ARABIC LETTER VEH + u'\u06af' # 0x00f8 -> ARABIC LETTER GAF + u'\u0688' # 0x00f9 -> ARABIC LETTER DDAL + u'\u0691' # 0x00fa -> ARABIC LETTER RREH + u'{' # 0x00fb -> LEFT CURLY BRACKET, right-left + u'|' # 0x00fc -> VERTICAL LINE, right-left + u'}' # 0x00fd -> RIGHT CURLY BRACKET, right-left + u'\u0698' # 0x00fe -> ARABIC LETTER JEH + u'\u06d2' # 0x00ff -> ARABIC LETTER YEH BARREE ) ### Encoding Map encoding_map = { - 0x0000: 0x0000, # CONTROL CHARACTER - 0x0001: 0x0001, # CONTROL CHARACTER - 0x0002: 0x0002, # CONTROL CHARACTER - 0x0003: 0x0003, # CONTROL CHARACTER - 0x0004: 0x0004, # CONTROL CHARACTER - 0x0005: 0x0005, # CONTROL CHARACTER - 0x0006: 0x0006, # CONTROL CHARACTER - 0x0007: 0x0007, # CONTROL CHARACTER - 0x0008: 0x0008, # CONTROL CHARACTER - 0x0009: 0x0009, # CONTROL CHARACTER - 0x000a: 0x000a, # CONTROL CHARACTER - 0x000b: 0x000b, # CONTROL CHARACTER - 0x000c: 0x000c, # CONTROL CHARACTER - 0x000d: 0x000d, # CONTROL CHARACTER - 0x000e: 0x000e, # CONTROL CHARACTER - 0x000f: 0x000f, # CONTROL CHARACTER - 0x0010: 0x0010, # CONTROL CHARACTER - 0x0011: 0x0011, # CONTROL CHARACTER - 0x0012: 0x0012, # CONTROL CHARACTER - 0x0013: 0x0013, # CONTROL CHARACTER - 0x0014: 0x0014, # CONTROL CHARACTER - 0x0015: 0x0015, # CONTROL CHARACTER - 0x0016: 0x0016, # CONTROL CHARACTER - 0x0017: 0x0017, # CONTROL CHARACTER - 0x0018: 0x0018, # CONTROL CHARACTER - 0x0019: 0x0019, # CONTROL CHARACTER - 0x001a: 0x001a, # CONTROL CHARACTER - 0x001b: 0x001b, # CONTROL CHARACTER - 0x001c: 0x001c, # CONTROL CHARACTER - 0x001d: 0x001d, # CONTROL CHARACTER - 0x001e: 0x001e, # CONTROL CHARACTER - 0x001f: 0x001f, # CONTROL CHARACTER - 0x0020: 0x0020, # SPACE, left-right - 0x0020: 0x00a0, # SPACE, right-left - 0x0021: 0x0021, # EXCLAMATION MARK, left-right - 0x0021: 0x00a1, # EXCLAMATION MARK, right-left - 0x0022: 0x0022, # QUOTATION MARK, left-right - 0x0022: 0x00a2, # QUOTATION MARK, right-left - 0x0023: 0x0023, # NUMBER SIGN, left-right - 0x0023: 0x00a3, # NUMBER SIGN, right-left - 0x0024: 0x0024, # DOLLAR SIGN, left-right - 0x0024: 0x00a4, # DOLLAR SIGN, right-left - 0x0025: 0x0025, # PERCENT SIGN, left-right - 0x0026: 0x0026, # AMPERSAND, left-right - 0x0026: 0x00a6, # AMPERSAND, right-left - 0x0027: 0x0027, # APOSTROPHE, left-right - 0x0027: 0x00a7, # APOSTROPHE, right-left - 0x0028: 0x0028, # LEFT PARENTHESIS, left-right - 0x0028: 0x00a8, # LEFT PARENTHESIS, right-left - 0x0029: 0x0029, # RIGHT PARENTHESIS, left-right - 0x0029: 0x00a9, # RIGHT PARENTHESIS, right-left - 0x002a: 0x002a, # ASTERISK, left-right - 0x002a: 0x00aa, # ASTERISK, right-left - 0x002b: 0x002b, # PLUS SIGN, left-right - 0x002b: 0x00ab, # PLUS SIGN, right-left - 0x002c: 0x002c, # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR - 0x002d: 0x002d, # HYPHEN-MINUS, left-right - 0x002d: 0x00ad, # HYPHEN-MINUS, right-left - 0x002e: 0x002e, # FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR - 0x002e: 0x00ae, # FULL STOP, right-left - 0x002f: 0x002f, # SOLIDUS, left-right - 0x002f: 0x00af, # SOLIDUS, right-left - 0x0030: 0x0030, # DIGIT ZERO; in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE; in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO; in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR; in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE; in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX; in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE; in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE - 0x003a: 0x003a, # COLON, left-right - 0x003a: 0x00ba, # COLON, right-left - 0x003b: 0x003b, # SEMICOLON, left-right - 0x003c: 0x003c, # LESS-THAN SIGN, left-right - 0x003c: 0x00bc, # LESS-THAN SIGN, right-left - 0x003d: 0x003d, # EQUALS SIGN, left-right - 0x003d: 0x00bd, # EQUALS SIGN, right-left - 0x003e: 0x003e, # GREATER-THAN SIGN, left-right - 0x003e: 0x00be, # GREATER-THAN SIGN, right-left - 0x003f: 0x003f, # QUESTION MARK, left-right - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET, left-right - 0x005b: 0x00db, # LEFT SQUARE BRACKET, right-left - 0x005c: 0x005c, # REVERSE SOLIDUS, left-right - 0x005c: 0x00dc, # REVERSE SOLIDUS, right-left - 0x005d: 0x005d, # RIGHT SQUARE BRACKET, left-right - 0x005d: 0x00dd, # RIGHT SQUARE BRACKET, right-left - 0x005e: 0x005e, # CIRCUMFLEX ACCENT, left-right - 0x005e: 0x00de, # CIRCUMFLEX ACCENT, right-left - 0x005f: 0x005f, # LOW LINE, left-right - 0x005f: 0x00df, # LOW LINE, right-left - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET, left-right - 0x007b: 0x00fb, # LEFT CURLY BRACKET, right-left - 0x007c: 0x007c, # VERTICAL LINE, left-right - 0x007c: 0x00fc, # VERTICAL LINE, right-left - 0x007d: 0x007d, # RIGHT CURLY BRACKET, left-right - 0x007d: 0x00fd, # RIGHT CURLY BRACKET, right-left - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # CONTROL CHARACTER - 0x00a0: 0x0081, # NO-BREAK SPACE, right-left - 0x00ab: 0x008c, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x00bb: 0x0098, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c7: 0x0082, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00d1: 0x0084, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x0087, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ed: 0x0092, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x0096, # LATIN SMALL LETTER N WITH TILDE - 0x00f3: 0x0097, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x009b, # DIVISION SIGN, right-left - 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x009c, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS - 0x060c: 0x00ac, # ARABIC COMMA - 0x061b: 0x00bb, # ARABIC SEMICOLON - 0x061f: 0x00bf, # ARABIC QUESTION MARK - 0x0621: 0x00c1, # ARABIC LETTER HAMZA - 0x0622: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x0623: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x0624: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x0625: 0x00c5, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x0626: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x0627: 0x00c7, # ARABIC LETTER ALEF - 0x0628: 0x00c8, # ARABIC LETTER BEH - 0x0629: 0x00c9, # ARABIC LETTER TEH MARBUTA - 0x062a: 0x00ca, # ARABIC LETTER TEH - 0x062b: 0x00cb, # ARABIC LETTER THEH - 0x062c: 0x00cc, # ARABIC LETTER JEEM - 0x062d: 0x00cd, # ARABIC LETTER HAH - 0x062e: 0x00ce, # ARABIC LETTER KHAH - 0x062f: 0x00cf, # ARABIC LETTER DAL - 0x0630: 0x00d0, # ARABIC LETTER THAL - 0x0631: 0x00d1, # ARABIC LETTER REH - 0x0632: 0x00d2, # ARABIC LETTER ZAIN - 0x0633: 0x00d3, # ARABIC LETTER SEEN - 0x0634: 0x00d4, # ARABIC LETTER SHEEN - 0x0635: 0x00d5, # ARABIC LETTER SAD - 0x0636: 0x00d6, # ARABIC LETTER DAD - 0x0637: 0x00d7, # ARABIC LETTER TAH - 0x0638: 0x00d8, # ARABIC LETTER ZAH - 0x0639: 0x00d9, # ARABIC LETTER AIN - 0x063a: 0x00da, # ARABIC LETTER GHAIN - 0x0640: 0x00e0, # ARABIC TATWEEL - 0x0641: 0x00e1, # ARABIC LETTER FEH - 0x0642: 0x00e2, # ARABIC LETTER QAF - 0x0643: 0x00e3, # ARABIC LETTER KAF - 0x0644: 0x00e4, # ARABIC LETTER LAM - 0x0645: 0x00e5, # ARABIC LETTER MEEM - 0x0646: 0x00e6, # ARABIC LETTER NOON - 0x0647: 0x00e7, # ARABIC LETTER HEH - 0x0648: 0x00e8, # ARABIC LETTER WAW - 0x0649: 0x00e9, # ARABIC LETTER ALEF MAKSURA - 0x064a: 0x00ea, # ARABIC LETTER YEH - 0x064b: 0x00eb, # ARABIC FATHATAN - 0x064c: 0x00ec, # ARABIC DAMMATAN - 0x064d: 0x00ed, # ARABIC KASRATAN - 0x064e: 0x00ee, # ARABIC FATHA - 0x064f: 0x00ef, # ARABIC DAMMA - 0x0650: 0x00f0, # ARABIC KASRA - 0x0651: 0x00f1, # ARABIC SHADDA - 0x0652: 0x00f2, # ARABIC SUKUN - 0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO, right-left (need override) - 0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE, right-left (need override) - 0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO, right-left (need override) - 0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE, right-left (need override) - 0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR, right-left (need override) - 0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE, right-left (need override) - 0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX, right-left (need override) - 0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN, right-left (need override) - 0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT, right-left (need override) - 0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE, right-left (need override) - 0x066a: 0x00a5, # ARABIC PERCENT SIGN - 0x0679: 0x00f4, # ARABIC LETTER TTEH - 0x067e: 0x00f3, # ARABIC LETTER PEH - 0x0686: 0x00f5, # ARABIC LETTER TCHEH - 0x0688: 0x00f9, # ARABIC LETTER DDAL - 0x0691: 0x00fa, # ARABIC LETTER RREH - 0x0698: 0x00fe, # ARABIC LETTER JEH - 0x06a4: 0x00f7, # ARABIC LETTER VEH - 0x06af: 0x00f8, # ARABIC LETTER GAF - 0x06ba: 0x008b, # ARABIC LETTER NOON GHUNNA - 0x06d2: 0x00ff, # ARABIC LETTER YEH BARREE - 0x06d5: 0x00f6, # ARABIC LETTER AE - 0x2026: 0x0093, # HORIZONTAL ELLIPSIS, right-left - 0x274a: 0x00c0, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left -} \ No newline at end of file + 0x0000: 0x0000, # CONTROL CHARACTER + 0x0001: 0x0001, # CONTROL CHARACTER + 0x0002: 0x0002, # CONTROL CHARACTER + 0x0003: 0x0003, # CONTROL CHARACTER + 0x0004: 0x0004, # CONTROL CHARACTER + 0x0005: 0x0005, # CONTROL CHARACTER + 0x0006: 0x0006, # CONTROL CHARACTER + 0x0007: 0x0007, # CONTROL CHARACTER + 0x0008: 0x0008, # CONTROL CHARACTER + 0x0009: 0x0009, # CONTROL CHARACTER + 0x000a: 0x000a, # CONTROL CHARACTER + 0x000b: 0x000b, # CONTROL CHARACTER + 0x000c: 0x000c, # CONTROL CHARACTER + 0x000d: 0x000d, # CONTROL CHARACTER + 0x000e: 0x000e, # CONTROL CHARACTER + 0x000f: 0x000f, # CONTROL CHARACTER + 0x0010: 0x0010, # CONTROL CHARACTER + 0x0011: 0x0011, # CONTROL CHARACTER + 0x0012: 0x0012, # CONTROL CHARACTER + 0x0013: 0x0013, # CONTROL CHARACTER + 0x0014: 0x0014, # CONTROL CHARACTER + 0x0015: 0x0015, # CONTROL CHARACTER + 0x0016: 0x0016, # CONTROL CHARACTER + 0x0017: 0x0017, # CONTROL CHARACTER + 0x0018: 0x0018, # CONTROL CHARACTER + 0x0019: 0x0019, # CONTROL CHARACTER + 0x001a: 0x001a, # CONTROL CHARACTER + 0x001b: 0x001b, # CONTROL CHARACTER + 0x001c: 0x001c, # CONTROL CHARACTER + 0x001d: 0x001d, # CONTROL CHARACTER + 0x001e: 0x001e, # CONTROL CHARACTER + 0x001f: 0x001f, # CONTROL CHARACTER + 0x0020: 0x0020, # SPACE, left-right + 0x0020: 0x00a0, # SPACE, right-left + 0x0021: 0x0021, # EXCLAMATION MARK, left-right + 0x0021: 0x00a1, # EXCLAMATION MARK, right-left + 0x0022: 0x0022, # QUOTATION MARK, left-right + 0x0022: 0x00a2, # QUOTATION MARK, right-left + 0x0023: 0x0023, # NUMBER SIGN, left-right + 0x0023: 0x00a3, # NUMBER SIGN, right-left + 0x0024: 0x0024, # DOLLAR SIGN, left-right + 0x0024: 0x00a4, # DOLLAR SIGN, right-left + 0x0025: 0x0025, # PERCENT SIGN, left-right + 0x0026: 0x0026, # AMPERSAND, left-right + 0x0026: 0x00a6, # AMPERSAND, right-left + 0x0027: 0x0027, # APOSTROPHE, left-right + 0x0027: 0x00a7, # APOSTROPHE, right-left + 0x0028: 0x0028, # LEFT PARENTHESIS, left-right + 0x0028: 0x00a8, # LEFT PARENTHESIS, right-left + 0x0029: 0x0029, # RIGHT PARENTHESIS, left-right + 0x0029: 0x00a9, # RIGHT PARENTHESIS, right-left + 0x002a: 0x002a, # ASTERISK, left-right + 0x002a: 0x00aa, # ASTERISK, right-left + 0x002b: 0x002b, # PLUS SIGN, left-right + 0x002b: 0x00ab, # PLUS SIGN, right-left + 0x002c: 0x002c, # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR + 0x002d: 0x002d, # HYPHEN-MINUS, left-right + 0x002d: 0x00ad, # HYPHEN-MINUS, right-left + 0x002e: 0x002e, # FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR + 0x002e: 0x00ae, # FULL STOP, right-left + 0x002f: 0x002f, # SOLIDUS, left-right + 0x002f: 0x00af, # SOLIDUS, right-left + 0x0030: 0x0030, # DIGIT ZERO; in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE; in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO; in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR; in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE; in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX; in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE; in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE + 0x003a: 0x003a, # COLON, left-right + 0x003a: 0x00ba, # COLON, right-left + 0x003b: 0x003b, # SEMICOLON, left-right + 0x003c: 0x003c, # LESS-THAN SIGN, left-right + 0x003c: 0x00bc, # LESS-THAN SIGN, right-left + 0x003d: 0x003d, # EQUALS SIGN, left-right + 0x003d: 0x00bd, # EQUALS SIGN, right-left + 0x003e: 0x003e, # GREATER-THAN SIGN, left-right + 0x003e: 0x00be, # GREATER-THAN SIGN, right-left + 0x003f: 0x003f, # QUESTION MARK, left-right + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET, left-right + 0x005b: 0x00db, # LEFT SQUARE BRACKET, right-left + 0x005c: 0x005c, # REVERSE SOLIDUS, left-right + 0x005c: 0x00dc, # REVERSE SOLIDUS, right-left + 0x005d: 0x005d, # RIGHT SQUARE BRACKET, left-right + 0x005d: 0x00dd, # RIGHT SQUARE BRACKET, right-left + 0x005e: 0x005e, # CIRCUMFLEX ACCENT, left-right + 0x005e: 0x00de, # CIRCUMFLEX ACCENT, right-left + 0x005f: 0x005f, # LOW LINE, left-right + 0x005f: 0x00df, # LOW LINE, right-left + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET, left-right + 0x007b: 0x00fb, # LEFT CURLY BRACKET, right-left + 0x007c: 0x007c, # VERTICAL LINE, left-right + 0x007c: 0x00fc, # VERTICAL LINE, right-left + 0x007d: 0x007d, # RIGHT CURLY BRACKET, left-right + 0x007d: 0x00fd, # RIGHT CURLY BRACKET, right-left + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # CONTROL CHARACTER + 0x00a0: 0x0081, # NO-BREAK SPACE, right-left + 0x00ab: 0x008c, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + 0x00bb: 0x0098, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c7: 0x0082, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d1: 0x0084, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x0087, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x0092, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x0096, # LATIN SMALL LETTER N WITH TILDE + 0x00f3: 0x0097, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x009b, # DIVISION SIGN, right-left + 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x009c, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS + 0x060c: 0x00ac, # ARABIC COMMA + 0x061b: 0x00bb, # ARABIC SEMICOLON + 0x061f: 0x00bf, # ARABIC QUESTION MARK + 0x0621: 0x00c1, # ARABIC LETTER HAMZA + 0x0622: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x0623: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x0624: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x0625: 0x00c5, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x0626: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x0627: 0x00c7, # ARABIC LETTER ALEF + 0x0628: 0x00c8, # ARABIC LETTER BEH + 0x0629: 0x00c9, # ARABIC LETTER TEH MARBUTA + 0x062a: 0x00ca, # ARABIC LETTER TEH + 0x062b: 0x00cb, # ARABIC LETTER THEH + 0x062c: 0x00cc, # ARABIC LETTER JEEM + 0x062d: 0x00cd, # ARABIC LETTER HAH + 0x062e: 0x00ce, # ARABIC LETTER KHAH + 0x062f: 0x00cf, # ARABIC LETTER DAL + 0x0630: 0x00d0, # ARABIC LETTER THAL + 0x0631: 0x00d1, # ARABIC LETTER REH + 0x0632: 0x00d2, # ARABIC LETTER ZAIN + 0x0633: 0x00d3, # ARABIC LETTER SEEN + 0x0634: 0x00d4, # ARABIC LETTER SHEEN + 0x0635: 0x00d5, # ARABIC LETTER SAD + 0x0636: 0x00d6, # ARABIC LETTER DAD + 0x0637: 0x00d7, # ARABIC LETTER TAH + 0x0638: 0x00d8, # ARABIC LETTER ZAH + 0x0639: 0x00d9, # ARABIC LETTER AIN + 0x063a: 0x00da, # ARABIC LETTER GHAIN + 0x0640: 0x00e0, # ARABIC TATWEEL + 0x0641: 0x00e1, # ARABIC LETTER FEH + 0x0642: 0x00e2, # ARABIC LETTER QAF + 0x0643: 0x00e3, # ARABIC LETTER KAF + 0x0644: 0x00e4, # ARABIC LETTER LAM + 0x0645: 0x00e5, # ARABIC LETTER MEEM + 0x0646: 0x00e6, # ARABIC LETTER NOON + 0x0647: 0x00e7, # ARABIC LETTER HEH + 0x0648: 0x00e8, # ARABIC LETTER WAW + 0x0649: 0x00e9, # ARABIC LETTER ALEF MAKSURA + 0x064a: 0x00ea, # ARABIC LETTER YEH + 0x064b: 0x00eb, # ARABIC FATHATAN + 0x064c: 0x00ec, # ARABIC DAMMATAN + 0x064d: 0x00ed, # ARABIC KASRATAN + 0x064e: 0x00ee, # ARABIC FATHA + 0x064f: 0x00ef, # ARABIC DAMMA + 0x0650: 0x00f0, # ARABIC KASRA + 0x0651: 0x00f1, # ARABIC SHADDA + 0x0652: 0x00f2, # ARABIC SUKUN + 0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO, right-left (need override) + 0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE, right-left (need override) + 0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO, right-left (need override) + 0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE, right-left (need override) + 0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR, right-left (need override) + 0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE, right-left (need override) + 0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX, right-left (need override) + 0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN, right-left (need override) + 0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT, right-left (need override) + 0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE, right-left (need override) + 0x066a: 0x00a5, # ARABIC PERCENT SIGN + 0x0679: 0x00f4, # ARABIC LETTER TTEH + 0x067e: 0x00f3, # ARABIC LETTER PEH + 0x0686: 0x00f5, # ARABIC LETTER TCHEH + 0x0688: 0x00f9, # ARABIC LETTER DDAL + 0x0691: 0x00fa, # ARABIC LETTER RREH + 0x0698: 0x00fe, # ARABIC LETTER JEH + 0x06a4: 0x00f7, # ARABIC LETTER VEH + 0x06af: 0x00f8, # ARABIC LETTER GAF + 0x06ba: 0x008b, # ARABIC LETTER NOON GHUNNA + 0x06d2: 0x00ff, # ARABIC LETTER YEH BARREE + 0x06d5: 0x00f6, # ARABIC LETTER AE + 0x2026: 0x0093, # HORIZONTAL ELLIPSIS, right-left + 0x274a: 0x00c0, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left +} Modified: python/branches/ssize_t/Lib/encodings/mac_centeuro.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/mac_centeuro.py (original) +++ python/branches/ssize_t/Lib/encodings/mac_centeuro.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0100' # 0x81 -> LATIN CAPITAL LETTER A WITH MACRON - u'\u0101' # 0x82 -> LATIN SMALL LETTER A WITH MACRON - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0104' # 0x84 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\u0105' # 0x88 -> LATIN SMALL LETTER A WITH OGONEK - u'\u010c' # 0x89 -> LATIN CAPITAL LETTER C WITH CARON - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u010d' # 0x8B -> LATIN SMALL LETTER C WITH CARON - u'\u0106' # 0x8C -> LATIN CAPITAL LETTER C WITH ACUTE - u'\u0107' # 0x8D -> LATIN SMALL LETTER C WITH ACUTE - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\u017a' # 0x90 -> LATIN SMALL LETTER Z WITH ACUTE - u'\u010e' # 0x91 -> LATIN CAPITAL LETTER D WITH CARON - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\u010f' # 0x93 -> LATIN SMALL LETTER D WITH CARON - u'\u0112' # 0x94 -> LATIN CAPITAL LETTER E WITH MACRON - u'\u0113' # 0x95 -> LATIN SMALL LETTER E WITH MACRON - u'\u0116' # 0x96 -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\u0117' # 0x98 -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\u011a' # 0x9D -> LATIN CAPITAL LETTER E WITH CARON - u'\u011b' # 0x9E -> LATIN SMALL LETTER E WITH CARON - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\u0118' # 0xA2 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\u0119' # 0xAB -> LATIN SMALL LETTER E WITH OGONEK - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\u0123' # 0xAE -> LATIN SMALL LETTER G WITH CEDILLA - u'\u012e' # 0xAF -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u012f' # 0xB0 -> LATIN SMALL LETTER I WITH OGONEK - u'\u012a' # 0xB1 -> LATIN CAPITAL LETTER I WITH MACRON - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\u012b' # 0xB4 -> LATIN SMALL LETTER I WITH MACRON - u'\u0136' # 0xB5 -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u0142' # 0xB8 -> LATIN SMALL LETTER L WITH STROKE - u'\u013b' # 0xB9 -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u013c' # 0xBA -> LATIN SMALL LETTER L WITH CEDILLA - u'\u013d' # 0xBB -> LATIN CAPITAL LETTER L WITH CARON - u'\u013e' # 0xBC -> LATIN SMALL LETTER L WITH CARON - u'\u0139' # 0xBD -> LATIN CAPITAL LETTER L WITH ACUTE - u'\u013a' # 0xBE -> LATIN SMALL LETTER L WITH ACUTE - u'\u0145' # 0xBF -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\u0146' # 0xC0 -> LATIN SMALL LETTER N WITH CEDILLA - u'\u0143' # 0xC1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0144' # 0xC4 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0147' # 0xC5 -> LATIN CAPITAL LETTER N WITH CARON - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\u0148' # 0xCB -> LATIN SMALL LETTER N WITH CARON - u'\u0150' # 0xCC -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0151' # 0xCE -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\u014c' # 0xCF -> LATIN CAPITAL LETTER O WITH MACRON - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\u014d' # 0xD8 -> LATIN SMALL LETTER O WITH MACRON - u'\u0154' # 0xD9 -> LATIN CAPITAL LETTER R WITH ACUTE - u'\u0155' # 0xDA -> LATIN SMALL LETTER R WITH ACUTE - u'\u0158' # 0xDB -> LATIN CAPITAL LETTER R WITH CARON - u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0159' # 0xDE -> LATIN SMALL LETTER R WITH CARON - u'\u0156' # 0xDF -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\u0157' # 0xE0 -> LATIN SMALL LETTER R WITH CEDILLA - u'\u0160' # 0xE1 -> LATIN CAPITAL LETTER S WITH CARON - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u0161' # 0xE4 -> LATIN SMALL LETTER S WITH CARON - u'\u015a' # 0xE5 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u015b' # 0xE6 -> LATIN SMALL LETTER S WITH ACUTE - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\u0164' # 0xE8 -> LATIN CAPITAL LETTER T WITH CARON - u'\u0165' # 0xE9 -> LATIN SMALL LETTER T WITH CARON - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\u017d' # 0xEB -> LATIN CAPITAL LETTER Z WITH CARON - u'\u017e' # 0xEC -> LATIN SMALL LETTER Z WITH CARON - u'\u016a' # 0xED -> LATIN CAPITAL LETTER U WITH MACRON - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u016b' # 0xF0 -> LATIN SMALL LETTER U WITH MACRON - u'\u016e' # 0xF1 -> LATIN CAPITAL LETTER U WITH RING ABOVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\u016f' # 0xF3 -> LATIN SMALL LETTER U WITH RING ABOVE - u'\u0170' # 0xF4 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\u0171' # 0xF5 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\u0172' # 0xF6 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\u0173' # 0xF7 -> LATIN SMALL LETTER U WITH OGONEK - u'\xdd' # 0xF8 -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xfd' # 0xF9 -> LATIN SMALL LETTER Y WITH ACUTE - u'\u0137' # 0xFA -> LATIN SMALL LETTER K WITH CEDILLA - u'\u017b' # 0xFB -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u0141' # 0xFC -> LATIN CAPITAL LETTER L WITH STROKE - u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u0122' # 0xFE -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u02c7' # 0xFF -> CARON + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0100' # 0x81 -> LATIN CAPITAL LETTER A WITH MACRON + u'\u0101' # 0x82 -> LATIN SMALL LETTER A WITH MACRON + u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0104' # 0x84 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE + u'\u0105' # 0x88 -> LATIN SMALL LETTER A WITH OGONEK + u'\u010c' # 0x89 -> LATIN CAPITAL LETTER C WITH CARON + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u010d' # 0x8B -> LATIN SMALL LETTER C WITH CARON + u'\u0106' # 0x8C -> LATIN CAPITAL LETTER C WITH ACUTE + u'\u0107' # 0x8D -> LATIN SMALL LETTER C WITH ACUTE + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\u017a' # 0x90 -> LATIN SMALL LETTER Z WITH ACUTE + u'\u010e' # 0x91 -> LATIN CAPITAL LETTER D WITH CARON + u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE + u'\u010f' # 0x93 -> LATIN SMALL LETTER D WITH CARON + u'\u0112' # 0x94 -> LATIN CAPITAL LETTER E WITH MACRON + u'\u0113' # 0x95 -> LATIN SMALL LETTER E WITH MACRON + u'\u0116' # 0x96 -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE + u'\u0117' # 0x98 -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\u011a' # 0x9D -> LATIN CAPITAL LETTER E WITH CARON + u'\u011b' # 0x9E -> LATIN SMALL LETTER E WITH CARON + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\u0118' # 0xA2 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\u0119' # 0xAB -> LATIN SMALL LETTER E WITH OGONEK + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\u0123' # 0xAE -> LATIN SMALL LETTER G WITH CEDILLA + u'\u012e' # 0xAF -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u012f' # 0xB0 -> LATIN SMALL LETTER I WITH OGONEK + u'\u012a' # 0xB1 -> LATIN CAPITAL LETTER I WITH MACRON + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\u012b' # 0xB4 -> LATIN SMALL LETTER I WITH MACRON + u'\u0136' # 0xB5 -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u0142' # 0xB8 -> LATIN SMALL LETTER L WITH STROKE + u'\u013b' # 0xB9 -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u013c' # 0xBA -> LATIN SMALL LETTER L WITH CEDILLA + u'\u013d' # 0xBB -> LATIN CAPITAL LETTER L WITH CARON + u'\u013e' # 0xBC -> LATIN SMALL LETTER L WITH CARON + u'\u0139' # 0xBD -> LATIN CAPITAL LETTER L WITH ACUTE + u'\u013a' # 0xBE -> LATIN SMALL LETTER L WITH ACUTE + u'\u0145' # 0xBF -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\u0146' # 0xC0 -> LATIN SMALL LETTER N WITH CEDILLA + u'\u0143' # 0xC1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0144' # 0xC4 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0147' # 0xC5 -> LATIN CAPITAL LETTER N WITH CARON + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\u0148' # 0xCB -> LATIN SMALL LETTER N WITH CARON + u'\u0150' # 0xCC -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0151' # 0xCE -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\u014c' # 0xCF -> LATIN CAPITAL LETTER O WITH MACRON + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\u014d' # 0xD8 -> LATIN SMALL LETTER O WITH MACRON + u'\u0154' # 0xD9 -> LATIN CAPITAL LETTER R WITH ACUTE + u'\u0155' # 0xDA -> LATIN SMALL LETTER R WITH ACUTE + u'\u0158' # 0xDB -> LATIN CAPITAL LETTER R WITH CARON + u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0159' # 0xDE -> LATIN SMALL LETTER R WITH CARON + u'\u0156' # 0xDF -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\u0157' # 0xE0 -> LATIN SMALL LETTER R WITH CEDILLA + u'\u0160' # 0xE1 -> LATIN CAPITAL LETTER S WITH CARON + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u0161' # 0xE4 -> LATIN SMALL LETTER S WITH CARON + u'\u015a' # 0xE5 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u015b' # 0xE6 -> LATIN SMALL LETTER S WITH ACUTE + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\u0164' # 0xE8 -> LATIN CAPITAL LETTER T WITH CARON + u'\u0165' # 0xE9 -> LATIN SMALL LETTER T WITH CARON + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\u017d' # 0xEB -> LATIN CAPITAL LETTER Z WITH CARON + u'\u017e' # 0xEC -> LATIN SMALL LETTER Z WITH CARON + u'\u016a' # 0xED -> LATIN CAPITAL LETTER U WITH MACRON + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u016b' # 0xF0 -> LATIN SMALL LETTER U WITH MACRON + u'\u016e' # 0xF1 -> LATIN CAPITAL LETTER U WITH RING ABOVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\u016f' # 0xF3 -> LATIN SMALL LETTER U WITH RING ABOVE + u'\u0170' # 0xF4 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\u0171' # 0xF5 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\u0172' # 0xF6 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\u0173' # 0xF7 -> LATIN SMALL LETTER U WITH OGONEK + u'\xdd' # 0xF8 -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xfd' # 0xF9 -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0137' # 0xFA -> LATIN SMALL LETTER K WITH CEDILLA + u'\u017b' # 0xFB -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u0141' # 0xFC -> LATIN CAPITAL LETTER L WITH STROKE + u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u0122' # 0xFE -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u02c7' # 0xFF -> CARON ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0xCA, # NO-BREAK SPACE - 0x00A3: 0xA3, # POUND SIGN - 0x00A7: 0xA4, # SECTION SIGN - 0x00A8: 0xAC, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xC2, # NOT SIGN - 0x00AE: 0xA8, # REGISTERED SIGN - 0x00B0: 0xA1, # DEGREE SIGN - 0x00B6: 0xA6, # PILCROW SIGN - 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xF8, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S - 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE - 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xD6, # DIVISION SIGN - 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE - 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xF9, # LATIN SMALL LETTER Y WITH ACUTE - 0x0100: 0x81, # LATIN CAPITAL LETTER A WITH MACRON - 0x0101: 0x82, # LATIN SMALL LETTER A WITH MACRON - 0x0104: 0x84, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0x88, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0x8C, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0x8D, # LATIN SMALL LETTER C WITH ACUTE - 0x010C: 0x89, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0x8B, # LATIN SMALL LETTER C WITH CARON - 0x010E: 0x91, # LATIN CAPITAL LETTER D WITH CARON - 0x010F: 0x93, # LATIN SMALL LETTER D WITH CARON - 0x0112: 0x94, # LATIN CAPITAL LETTER E WITH MACRON - 0x0113: 0x95, # LATIN SMALL LETTER E WITH MACRON - 0x0116: 0x96, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0117: 0x98, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0xA2, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xAB, # LATIN SMALL LETTER E WITH OGONEK - 0x011A: 0x9D, # LATIN CAPITAL LETTER E WITH CARON - 0x011B: 0x9E, # LATIN SMALL LETTER E WITH CARON - 0x0122: 0xFE, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0xAE, # LATIN SMALL LETTER G WITH CEDILLA - 0x012A: 0xB1, # LATIN CAPITAL LETTER I WITH MACRON - 0x012B: 0xB4, # LATIN SMALL LETTER I WITH MACRON - 0x012E: 0xAF, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012F: 0xB0, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0xB5, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0xFA, # LATIN SMALL LETTER K WITH CEDILLA - 0x0139: 0xBD, # LATIN CAPITAL LETTER L WITH ACUTE - 0x013A: 0xBE, # LATIN SMALL LETTER L WITH ACUTE - 0x013B: 0xB9, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013C: 0xBA, # LATIN SMALL LETTER L WITH CEDILLA - 0x013D: 0xBB, # LATIN CAPITAL LETTER L WITH CARON - 0x013E: 0xBC, # LATIN SMALL LETTER L WITH CARON - 0x0141: 0xFC, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xB8, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xC1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xC4, # LATIN SMALL LETTER N WITH ACUTE - 0x0145: 0xBF, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0xC0, # LATIN SMALL LETTER N WITH CEDILLA - 0x0147: 0xC5, # LATIN CAPITAL LETTER N WITH CARON - 0x0148: 0xCB, # LATIN SMALL LETTER N WITH CARON - 0x014C: 0xCF, # LATIN CAPITAL LETTER O WITH MACRON - 0x014D: 0xD8, # LATIN SMALL LETTER O WITH MACRON - 0x0150: 0xCC, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x0151: 0xCE, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x0154: 0xD9, # LATIN CAPITAL LETTER R WITH ACUTE - 0x0155: 0xDA, # LATIN SMALL LETTER R WITH ACUTE - 0x0156: 0xDF, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x0157: 0xE0, # LATIN SMALL LETTER R WITH CEDILLA - 0x0158: 0xDB, # LATIN CAPITAL LETTER R WITH CARON - 0x0159: 0xDE, # LATIN SMALL LETTER R WITH CARON - 0x015A: 0xE5, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015B: 0xE6, # LATIN SMALL LETTER S WITH ACUTE - 0x0160: 0xE1, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xE4, # LATIN SMALL LETTER S WITH CARON - 0x0164: 0xE8, # LATIN CAPITAL LETTER T WITH CARON - 0x0165: 0xE9, # LATIN SMALL LETTER T WITH CARON - 0x016A: 0xED, # LATIN CAPITAL LETTER U WITH MACRON - 0x016B: 0xF0, # LATIN SMALL LETTER U WITH MACRON - 0x016E: 0xF1, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x016F: 0xF3, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0170: 0xF4, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x0171: 0xF5, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x0172: 0xF6, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0xF7, # LATIN SMALL LETTER U WITH OGONEK - 0x0179: 0x8F, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017A: 0x90, # LATIN SMALL LETTER Z WITH ACUTE - 0x017B: 0xFB, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017C: 0xFD, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017D: 0xEB, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xEC, # LATIN SMALL LETTER Z WITH CARON - 0x02C7: 0xFF, # CARON - 0x2013: 0xD0, # EN DASH - 0x2014: 0xD1, # EM DASH - 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xA0, # DAGGER - 0x2022: 0xA5, # BULLET - 0x2026: 0xC9, # HORIZONTAL ELLIPSIS - 0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x2122: 0xAA, # TRADE MARK SIGN - 0x2202: 0xB6, # PARTIAL DIFFERENTIAL - 0x2206: 0xC6, # INCREMENT - 0x2211: 0xB7, # N-ARY SUMMATION - 0x221A: 0xC3, # SQUARE ROOT - 0x2260: 0xAD, # NOT EQUAL TO - 0x2264: 0xB2, # LESS-THAN OR EQUAL TO - 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO - 0x25CA: 0xD7, # LOZENGE + 0x0000: 0x00, # CONTROL CHARACTER + 0x0001: 0x01, # CONTROL CHARACTER + 0x0002: 0x02, # CONTROL CHARACTER + 0x0003: 0x03, # CONTROL CHARACTER + 0x0004: 0x04, # CONTROL CHARACTER + 0x0005: 0x05, # CONTROL CHARACTER + 0x0006: 0x06, # CONTROL CHARACTER + 0x0007: 0x07, # CONTROL CHARACTER + 0x0008: 0x08, # CONTROL CHARACTER + 0x0009: 0x09, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER + 0x0010: 0x10, # CONTROL CHARACTER + 0x0011: 0x11, # CONTROL CHARACTER + 0x0012: 0x12, # CONTROL CHARACTER + 0x0013: 0x13, # CONTROL CHARACTER + 0x0014: 0x14, # CONTROL CHARACTER + 0x0015: 0x15, # CONTROL CHARACTER + 0x0016: 0x16, # CONTROL CHARACTER + 0x0017: 0x17, # CONTROL CHARACTER + 0x0018: 0x18, # CONTROL CHARACTER + 0x0019: 0x19, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0xCA, # NO-BREAK SPACE + 0x00A3: 0xA3, # POUND SIGN + 0x00A7: 0xA4, # SECTION SIGN + 0x00A8: 0xAC, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xC2, # NOT SIGN + 0x00AE: 0xA8, # REGISTERED SIGN + 0x00B0: 0xA1, # DEGREE SIGN + 0x00B6: 0xA6, # PILCROW SIGN + 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xF8, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S + 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE + 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE + 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE + 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xD6, # DIVISION SIGN + 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE + 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xF9, # LATIN SMALL LETTER Y WITH ACUTE + 0x0100: 0x81, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0x82, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0x84, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x88, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x8C, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x8D, # LATIN SMALL LETTER C WITH ACUTE + 0x010C: 0x89, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0x8B, # LATIN SMALL LETTER C WITH CARON + 0x010E: 0x91, # LATIN CAPITAL LETTER D WITH CARON + 0x010F: 0x93, # LATIN SMALL LETTER D WITH CARON + 0x0112: 0x94, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0x95, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0x96, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0x98, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0xA2, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0xAB, # LATIN SMALL LETTER E WITH OGONEK + 0x011A: 0x9D, # LATIN CAPITAL LETTER E WITH CARON + 0x011B: 0x9E, # LATIN SMALL LETTER E WITH CARON + 0x0122: 0xFE, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0xAE, # LATIN SMALL LETTER G WITH CEDILLA + 0x012A: 0xB1, # LATIN CAPITAL LETTER I WITH MACRON + 0x012B: 0xB4, # LATIN SMALL LETTER I WITH MACRON + 0x012E: 0xAF, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012F: 0xB0, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0xB5, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0xFA, # LATIN SMALL LETTER K WITH CEDILLA + 0x0139: 0xBD, # LATIN CAPITAL LETTER L WITH ACUTE + 0x013A: 0xBE, # LATIN SMALL LETTER L WITH ACUTE + 0x013B: 0xB9, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013C: 0xBA, # LATIN SMALL LETTER L WITH CEDILLA + 0x013D: 0xBB, # LATIN CAPITAL LETTER L WITH CARON + 0x013E: 0xBC, # LATIN SMALL LETTER L WITH CARON + 0x0141: 0xFC, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0xB8, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0xC1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0xC4, # LATIN SMALL LETTER N WITH ACUTE + 0x0145: 0xBF, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0xC0, # LATIN SMALL LETTER N WITH CEDILLA + 0x0147: 0xC5, # LATIN CAPITAL LETTER N WITH CARON + 0x0148: 0xCB, # LATIN SMALL LETTER N WITH CARON + 0x014C: 0xCF, # LATIN CAPITAL LETTER O WITH MACRON + 0x014D: 0xD8, # LATIN SMALL LETTER O WITH MACRON + 0x0150: 0xCC, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0xCE, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0154: 0xD9, # LATIN CAPITAL LETTER R WITH ACUTE + 0x0155: 0xDA, # LATIN SMALL LETTER R WITH ACUTE + 0x0156: 0xDF, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0xE0, # LATIN SMALL LETTER R WITH CEDILLA + 0x0158: 0xDB, # LATIN CAPITAL LETTER R WITH CARON + 0x0159: 0xDE, # LATIN SMALL LETTER R WITH CARON + 0x015A: 0xE5, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015B: 0xE6, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0xE1, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xE4, # LATIN SMALL LETTER S WITH CARON + 0x0164: 0xE8, # LATIN CAPITAL LETTER T WITH CARON + 0x0165: 0xE9, # LATIN SMALL LETTER T WITH CARON + 0x016A: 0xED, # LATIN CAPITAL LETTER U WITH MACRON + 0x016B: 0xF0, # LATIN SMALL LETTER U WITH MACRON + 0x016E: 0xF1, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x016F: 0xF3, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0170: 0xF4, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0xF5, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0172: 0xF6, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0xF7, # LATIN SMALL LETTER U WITH OGONEK + 0x0179: 0x8F, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017A: 0x90, # LATIN SMALL LETTER Z WITH ACUTE + 0x017B: 0xFB, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017C: 0xFD, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017D: 0xEB, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xEC, # LATIN SMALL LETTER Z WITH CARON + 0x02C7: 0xFF, # CARON + 0x2013: 0xD0, # EN DASH + 0x2014: 0xD1, # EM DASH + 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0xA0, # DAGGER + 0x2022: 0xA5, # BULLET + 0x2026: 0xC9, # HORIZONTAL ELLIPSIS + 0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x2122: 0xAA, # TRADE MARK SIGN + 0x2202: 0xB6, # PARTIAL DIFFERENTIAL + 0x2206: 0xC6, # INCREMENT + 0x2211: 0xB7, # N-ARY SUMMATION + 0x221A: 0xC3, # SQUARE ROOT + 0x2260: 0xAD, # NOT EQUAL TO + 0x2264: 0xB2, # LESS-THAN OR EQUAL TO + 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO + 0x25CA: 0xD7, # LOZENGE } - Modified: python/branches/ssize_t/Lib/encodings/mac_croatian.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/mac_croatian.py (original) +++ python/branches/ssize_t/Lib/encodings/mac_croatian.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\xb4' # 0xAB -> ACUTE ACCENT - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON - u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\u2206' # 0xB4 -> INCREMENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u220f' # 0xB8 -> N-ARY PRODUCT - u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON - u'\u222b' # 0xBA -> INTEGRAL - u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA - u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON - u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE - u'\xbf' # 0xC0 -> INVERTED QUESTION MARK - u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\uf8ff' # 0xD8 -> Apple logo - u'\xa9' # 0xD9 -> COPYRIGHT SIGN - u'\u2044' # 0xDA -> FRACTION SLASH - u'\u20ac' # 0xDB -> EURO SIGN - u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\xc6' # 0xDE -> LATIN CAPITAL LETTER AE - u'\xbb' # 0xDF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2013' # 0xE0 -> EN DASH - u'\xb7' # 0xE1 -> MIDDLE DOT - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xE4 -> PER MILLE SIGN - u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I - u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xF7 -> SMALL TILDE - u'\xaf' # 0xF8 -> MACRON - u'\u03c0' # 0xF9 -> GREEK SMALL LETTER PI - u'\xcb' # 0xFA -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u02da' # 0xFB -> RING ABOVE - u'\xb8' # 0xFC -> CEDILLA - u'\xca' # 0xFD -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xe6' # 0xFE -> LATIN SMALL LETTER AE - u'\u02c7' # 0xFF -> CARON + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE + u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE + u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\xb4' # 0xAB -> ACUTE ACCENT + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON + u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\u2206' # 0xB4 -> INCREMENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u220f' # 0xB8 -> N-ARY PRODUCT + u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON + u'\u222b' # 0xBA -> INTEGRAL + u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR + u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA + u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON + u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0xC0 -> INVERTED QUESTION MARK + u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\uf8ff' # 0xD8 -> Apple logo + u'\xa9' # 0xD9 -> COPYRIGHT SIGN + u'\u2044' # 0xDA -> FRACTION SLASH + u'\u20ac' # 0xDB -> EURO SIGN + u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\xc6' # 0xDE -> LATIN CAPITAL LETTER AE + u'\xbb' # 0xDF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2013' # 0xE0 -> EN DASH + u'\xb7' # 0xE1 -> MIDDLE DOT + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0xE4 -> PER MILLE SIGN + u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I + u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0xF7 -> SMALL TILDE + u'\xaf' # 0xF8 -> MACRON + u'\u03c0' # 0xF9 -> GREEK SMALL LETTER PI + u'\xcb' # 0xFA -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u02da' # 0xFB -> RING ABOVE + u'\xb8' # 0xFC -> CEDILLA + u'\xca' # 0xFD -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xe6' # 0xFE -> LATIN SMALL LETTER AE + u'\u02c7' # 0xFF -> CARON ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0xCA, # NO-BREAK SPACE - 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A7: 0xA4, # SECTION SIGN - 0x00A8: 0xAC, # DIAERESIS - 0x00A9: 0xD9, # COPYRIGHT SIGN - 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xC2, # NOT SIGN - 0x00AE: 0xA8, # REGISTERED SIGN - 0x00AF: 0xF8, # MACRON - 0x00B0: 0xA1, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B4: 0xAB, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xA6, # PILCROW SIGN - 0x00B7: 0xE1, # MIDDLE DOT - 0x00B8: 0xFC, # CEDILLA - 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xDF, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BF: 0xC0, # INVERTED QUESTION MARK - 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xDE, # LATIN CAPITAL LETTER AE - 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xFD, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xFA, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S - 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xFE, # LATIN SMALL LETTER AE - 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xD6, # DIVISION SIGN - 0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0106: 0xC6, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xE6, # LATIN SMALL LETTER C WITH ACUTE - 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON - 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE - 0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xCF, # LATIN SMALL LIGATURE OE - 0x0160: 0xA9, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xB9, # LATIN SMALL LETTER S WITH CARON - 0x017D: 0xAE, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xBE, # LATIN SMALL LETTER Z WITH CARON - 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK - 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02C7: 0xFF, # CARON - 0x02DA: 0xFB, # RING ABOVE - 0x02DC: 0xF7, # SMALL TILDE - 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA - 0x03C0: 0xF9, # GREEK SMALL LETTER PI - 0x2013: 0xE0, # EN DASH - 0x2014: 0xD1, # EM DASH - 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xA0, # DAGGER - 0x2022: 0xA5, # BULLET - 0x2026: 0xC9, # HORIZONTAL ELLIPSIS - 0x2030: 0xE4, # PER MILLE SIGN - 0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x2044: 0xDA, # FRACTION SLASH - 0x20AC: 0xDB, # EURO SIGN - 0x2122: 0xAA, # TRADE MARK SIGN - 0x2202: 0xB6, # PARTIAL DIFFERENTIAL - 0x2206: 0xB4, # INCREMENT - 0x220F: 0xB8, # N-ARY PRODUCT - 0x2211: 0xB7, # N-ARY SUMMATION - 0x221A: 0xC3, # SQUARE ROOT - 0x221E: 0xB0, # INFINITY - 0x222B: 0xBA, # INTEGRAL - 0x2248: 0xC5, # ALMOST EQUAL TO - 0x2260: 0xAD, # NOT EQUAL TO - 0x2264: 0xB2, # LESS-THAN OR EQUAL TO - 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO - 0x25CA: 0xD7, # LOZENGE - 0xF8FF: 0xD8, # Apple logo + 0x0000: 0x00, # CONTROL CHARACTER + 0x0001: 0x01, # CONTROL CHARACTER + 0x0002: 0x02, # CONTROL CHARACTER + 0x0003: 0x03, # CONTROL CHARACTER + 0x0004: 0x04, # CONTROL CHARACTER + 0x0005: 0x05, # CONTROL CHARACTER + 0x0006: 0x06, # CONTROL CHARACTER + 0x0007: 0x07, # CONTROL CHARACTER + 0x0008: 0x08, # CONTROL CHARACTER + 0x0009: 0x09, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER + 0x0010: 0x10, # CONTROL CHARACTER + 0x0011: 0x11, # CONTROL CHARACTER + 0x0012: 0x12, # CONTROL CHARACTER + 0x0013: 0x13, # CONTROL CHARACTER + 0x0014: 0x14, # CONTROL CHARACTER + 0x0015: 0x15, # CONTROL CHARACTER + 0x0016: 0x16, # CONTROL CHARACTER + 0x0017: 0x17, # CONTROL CHARACTER + 0x0018: 0x18, # CONTROL CHARACTER + 0x0019: 0x19, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0xCA, # NO-BREAK SPACE + 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A7: 0xA4, # SECTION SIGN + 0x00A8: 0xAC, # DIAERESIS + 0x00A9: 0xD9, # COPYRIGHT SIGN + 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xC2, # NOT SIGN + 0x00AE: 0xA8, # REGISTERED SIGN + 0x00AF: 0xF8, # MACRON + 0x00B0: 0xA1, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B4: 0xAB, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xA6, # PILCROW SIGN + 0x00B7: 0xE1, # MIDDLE DOT + 0x00B8: 0xFC, # CEDILLA + 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xDF, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BF: 0xC0, # INVERTED QUESTION MARK + 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xDE, # LATIN CAPITAL LETTER AE + 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xFD, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xFA, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S + 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xFE, # LATIN SMALL LETTER AE + 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xD6, # DIVISION SIGN + 0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0106: 0xC6, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0xE6, # LATIN SMALL LETTER C WITH ACUTE + 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON + 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE + 0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE + 0x0153: 0xCF, # LATIN SMALL LIGATURE OE + 0x0160: 0xA9, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xB9, # LATIN SMALL LETTER S WITH CARON + 0x017D: 0xAE, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xBE, # LATIN SMALL LETTER Z WITH CARON + 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK + 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02C7: 0xFF, # CARON + 0x02DA: 0xFB, # RING ABOVE + 0x02DC: 0xF7, # SMALL TILDE + 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA + 0x03C0: 0xF9, # GREEK SMALL LETTER PI + 0x2013: 0xE0, # EN DASH + 0x2014: 0xD1, # EM DASH + 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0xA0, # DAGGER + 0x2022: 0xA5, # BULLET + 0x2026: 0xC9, # HORIZONTAL ELLIPSIS + 0x2030: 0xE4, # PER MILLE SIGN + 0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x2044: 0xDA, # FRACTION SLASH + 0x20AC: 0xDB, # EURO SIGN + 0x2122: 0xAA, # TRADE MARK SIGN + 0x2202: 0xB6, # PARTIAL DIFFERENTIAL + 0x2206: 0xB4, # INCREMENT + 0x220F: 0xB8, # N-ARY PRODUCT + 0x2211: 0xB7, # N-ARY SUMMATION + 0x221A: 0xC3, # SQUARE ROOT + 0x221E: 0xB0, # INFINITY + 0x222B: 0xBA, # INTEGRAL + 0x2248: 0xC5, # ALMOST EQUAL TO + 0x2260: 0xAD, # NOT EQUAL TO + 0x2264: 0xB2, # LESS-THAN OR EQUAL TO + 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO + 0x25CA: 0xD7, # LOZENGE + 0xF8FF: 0xD8, # Apple logo } - Modified: python/branches/ssize_t/Lib/encodings/mac_cyrillic.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/mac_cyrillic.py (original) +++ python/branches/ssize_t/Lib/encodings/mac_cyrillic.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\u0410' # 0x80 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0x81 -> CYRILLIC CAPITAL LETTER BE - u'\u0412' # 0x82 -> CYRILLIC CAPITAL LETTER VE - u'\u0413' # 0x83 -> CYRILLIC CAPITAL LETTER GHE - u'\u0414' # 0x84 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0x85 -> CYRILLIC CAPITAL LETTER IE - u'\u0416' # 0x86 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0417' # 0x87 -> CYRILLIC CAPITAL LETTER ZE - u'\u0418' # 0x88 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0x89 -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0x8A -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0x8B -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0x8C -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0x8D -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0x8E -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0x8F -> CYRILLIC CAPITAL LETTER PE - u'\u0420' # 0x90 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0x91 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0x92 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0x93 -> CYRILLIC CAPITAL LETTER U - u'\u0424' # 0x94 -> CYRILLIC CAPITAL LETTER EF - u'\u0425' # 0x95 -> CYRILLIC CAPITAL LETTER HA - u'\u0426' # 0x96 -> CYRILLIC CAPITAL LETTER TSE - u'\u0427' # 0x97 -> CYRILLIC CAPITAL LETTER CHE - u'\u0428' # 0x98 -> CYRILLIC CAPITAL LETTER SHA - u'\u0429' # 0x99 -> CYRILLIC CAPITAL LETTER SHCHA - u'\u042a' # 0x9A -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u042b' # 0x9B -> CYRILLIC CAPITAL LETTER YERU - u'\u042c' # 0x9C -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042d' # 0x9D -> CYRILLIC CAPITAL LETTER E - u'\u042e' # 0x9E -> CYRILLIC CAPITAL LETTER YU - u'\u042f' # 0x9F -> CYRILLIC CAPITAL LETTER YA - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\u0490' # 0xA2 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\u0406' # 0xA7 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\u0402' # 0xAB -> CYRILLIC CAPITAL LETTER DJE - u'\u0452' # 0xAC -> CYRILLIC SMALL LETTER DJE - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\u0403' # 0xAE -> CYRILLIC CAPITAL LETTER GJE - u'\u0453' # 0xAF -> CYRILLIC SMALL LETTER GJE - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\u0456' # 0xB4 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u0491' # 0xB6 -> CYRILLIC SMALL LETTER GHE WITH UPTURN - u'\u0408' # 0xB7 -> CYRILLIC CAPITAL LETTER JE - u'\u0404' # 0xB8 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u0454' # 0xB9 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u0407' # 0xBA -> CYRILLIC CAPITAL LETTER YI - u'\u0457' # 0xBB -> CYRILLIC SMALL LETTER YI - u'\u0409' # 0xBC -> CYRILLIC CAPITAL LETTER LJE - u'\u0459' # 0xBD -> CYRILLIC SMALL LETTER LJE - u'\u040a' # 0xBE -> CYRILLIC CAPITAL LETTER NJE - u'\u045a' # 0xBF -> CYRILLIC SMALL LETTER NJE - u'\u0458' # 0xC0 -> CYRILLIC SMALL LETTER JE - u'\u0405' # 0xC1 -> CYRILLIC CAPITAL LETTER DZE - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\u040b' # 0xCB -> CYRILLIC CAPITAL LETTER TSHE - u'\u045b' # 0xCC -> CYRILLIC SMALL LETTER TSHE - u'\u040c' # 0xCD -> CYRILLIC CAPITAL LETTER KJE - u'\u045c' # 0xCE -> CYRILLIC SMALL LETTER KJE - u'\u0455' # 0xCF -> CYRILLIC SMALL LETTER DZE - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u201e' # 0xD7 -> DOUBLE LOW-9 QUOTATION MARK - u'\u040e' # 0xD8 -> CYRILLIC CAPITAL LETTER SHORT U - u'\u045e' # 0xD9 -> CYRILLIC SMALL LETTER SHORT U - u'\u040f' # 0xDA -> CYRILLIC CAPITAL LETTER DZHE - u'\u045f' # 0xDB -> CYRILLIC SMALL LETTER DZHE - u'\u2116' # 0xDC -> NUMERO SIGN - u'\u0401' # 0xDD -> CYRILLIC CAPITAL LETTER IO - u'\u0451' # 0xDE -> CYRILLIC SMALL LETTER IO - u'\u044f' # 0xDF -> CYRILLIC SMALL LETTER YA - u'\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE - u'\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE - u'\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE - u'\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE - u'\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE - u'\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE - u'\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xED -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xEE -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE - u'\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U - u'\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF - u'\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA - u'\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE - u'\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE - u'\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA - u'\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA - u'\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN - u'\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU - u'\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044d' # 0xFD -> CYRILLIC SMALL LETTER E - u'\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU - u'\u20ac' # 0xFF -> EURO SIGN + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\u0410' # 0x80 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0x81 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0x82 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0x83 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0x84 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0x85 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0x86 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0x87 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0x88 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0x89 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0x8A -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0x8B -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0x8C -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0x8D -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0x8E -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0x8F -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0x90 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0x91 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0x92 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0x93 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0x94 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0x95 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0x96 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0x97 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0x98 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0x99 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0x9A -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0x9B -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0x9C -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0x9D -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0x9E -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0x9F -> CYRILLIC CAPITAL LETTER YA + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\u0490' # 0xA2 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\u0406' # 0xA7 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\u0402' # 0xAB -> CYRILLIC CAPITAL LETTER DJE + u'\u0452' # 0xAC -> CYRILLIC SMALL LETTER DJE + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\u0403' # 0xAE -> CYRILLIC CAPITAL LETTER GJE + u'\u0453' # 0xAF -> CYRILLIC SMALL LETTER GJE + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\u0456' # 0xB4 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u0491' # 0xB6 -> CYRILLIC SMALL LETTER GHE WITH UPTURN + u'\u0408' # 0xB7 -> CYRILLIC CAPITAL LETTER JE + u'\u0404' # 0xB8 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0454' # 0xB9 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0407' # 0xBA -> CYRILLIC CAPITAL LETTER YI + u'\u0457' # 0xBB -> CYRILLIC SMALL LETTER YI + u'\u0409' # 0xBC -> CYRILLIC CAPITAL LETTER LJE + u'\u0459' # 0xBD -> CYRILLIC SMALL LETTER LJE + u'\u040a' # 0xBE -> CYRILLIC CAPITAL LETTER NJE + u'\u045a' # 0xBF -> CYRILLIC SMALL LETTER NJE + u'\u0458' # 0xC0 -> CYRILLIC SMALL LETTER JE + u'\u0405' # 0xC1 -> CYRILLIC CAPITAL LETTER DZE + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\u040b' # 0xCB -> CYRILLIC CAPITAL LETTER TSHE + u'\u045b' # 0xCC -> CYRILLIC SMALL LETTER TSHE + u'\u040c' # 0xCD -> CYRILLIC CAPITAL LETTER KJE + u'\u045c' # 0xCE -> CYRILLIC SMALL LETTER KJE + u'\u0455' # 0xCF -> CYRILLIC SMALL LETTER DZE + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u201e' # 0xD7 -> DOUBLE LOW-9 QUOTATION MARK + u'\u040e' # 0xD8 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045e' # 0xD9 -> CYRILLIC SMALL LETTER SHORT U + u'\u040f' # 0xDA -> CYRILLIC CAPITAL LETTER DZHE + u'\u045f' # 0xDB -> CYRILLIC SMALL LETTER DZHE + u'\u2116' # 0xDC -> NUMERO SIGN + u'\u0401' # 0xDD -> CYRILLIC CAPITAL LETTER IO + u'\u0451' # 0xDE -> CYRILLIC SMALL LETTER IO + u'\u044f' # 0xDF -> CYRILLIC SMALL LETTER YA + u'\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0xED -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0xEE -> CYRILLIC SMALL LETTER O + u'\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE + u'\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0xFD -> CYRILLIC SMALL LETTER E + u'\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU + u'\u20ac' # 0xFF -> EURO SIGN ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0xCA, # NO-BREAK SPACE - 0x00A3: 0xA3, # POUND SIGN - 0x00A7: 0xA4, # SECTION SIGN - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xC2, # NOT SIGN - 0x00AE: 0xA8, # REGISTERED SIGN - 0x00B0: 0xA1, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xA6, # PILCROW SIGN - 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00F7: 0xD6, # DIVISION SIGN - 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK - 0x0401: 0xDD, # CYRILLIC CAPITAL LETTER IO - 0x0402: 0xAB, # CYRILLIC CAPITAL LETTER DJE - 0x0403: 0xAE, # CYRILLIC CAPITAL LETTER GJE - 0x0404: 0xB8, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0405: 0xC1, # CYRILLIC CAPITAL LETTER DZE - 0x0406: 0xA7, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0407: 0xBA, # CYRILLIC CAPITAL LETTER YI - 0x0408: 0xB7, # CYRILLIC CAPITAL LETTER JE - 0x0409: 0xBC, # CYRILLIC CAPITAL LETTER LJE - 0x040A: 0xBE, # CYRILLIC CAPITAL LETTER NJE - 0x040B: 0xCB, # CYRILLIC CAPITAL LETTER TSHE - 0x040C: 0xCD, # CYRILLIC CAPITAL LETTER KJE - 0x040E: 0xD8, # CYRILLIC CAPITAL LETTER SHORT U - 0x040F: 0xDA, # CYRILLIC CAPITAL LETTER DZHE - 0x0410: 0x80, # CYRILLIC CAPITAL LETTER A - 0x0411: 0x81, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0x82, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0x83, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0x84, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0x85, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0x86, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0x87, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0x88, # CYRILLIC CAPITAL LETTER I - 0x0419: 0x89, # CYRILLIC CAPITAL LETTER SHORT I - 0x041A: 0x8A, # CYRILLIC CAPITAL LETTER KA - 0x041B: 0x8B, # CYRILLIC CAPITAL LETTER EL - 0x041C: 0x8C, # CYRILLIC CAPITAL LETTER EM - 0x041D: 0x8D, # CYRILLIC CAPITAL LETTER EN - 0x041E: 0x8E, # CYRILLIC CAPITAL LETTER O - 0x041F: 0x8F, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0x90, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0x91, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0x92, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0x93, # CYRILLIC CAPITAL LETTER U - 0x0424: 0x94, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0x95, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0x96, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0x97, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0x98, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0x99, # CYRILLIC CAPITAL LETTER SHCHA - 0x042A: 0x9A, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042B: 0x9B, # CYRILLIC CAPITAL LETTER YERU - 0x042C: 0x9C, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042D: 0x9D, # CYRILLIC CAPITAL LETTER E - 0x042E: 0x9E, # CYRILLIC CAPITAL LETTER YU - 0x042F: 0x9F, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0xE0, # CYRILLIC SMALL LETTER A - 0x0431: 0xE1, # CYRILLIC SMALL LETTER BE - 0x0432: 0xE2, # CYRILLIC SMALL LETTER VE - 0x0433: 0xE3, # CYRILLIC SMALL LETTER GHE - 0x0434: 0xE4, # CYRILLIC SMALL LETTER DE - 0x0435: 0xE5, # CYRILLIC SMALL LETTER IE - 0x0436: 0xE6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0xE7, # CYRILLIC SMALL LETTER ZE - 0x0438: 0xE8, # CYRILLIC SMALL LETTER I - 0x0439: 0xE9, # CYRILLIC SMALL LETTER SHORT I - 0x043A: 0xEA, # CYRILLIC SMALL LETTER KA - 0x043B: 0xEB, # CYRILLIC SMALL LETTER EL - 0x043C: 0xEC, # CYRILLIC SMALL LETTER EM - 0x043D: 0xED, # CYRILLIC SMALL LETTER EN - 0x043E: 0xEE, # CYRILLIC SMALL LETTER O - 0x043F: 0xEF, # CYRILLIC SMALL LETTER PE - 0x0440: 0xF0, # CYRILLIC SMALL LETTER ER - 0x0441: 0xF1, # CYRILLIC SMALL LETTER ES - 0x0442: 0xF2, # CYRILLIC SMALL LETTER TE - 0x0443: 0xF3, # CYRILLIC SMALL LETTER U - 0x0444: 0xF4, # CYRILLIC SMALL LETTER EF - 0x0445: 0xF5, # CYRILLIC SMALL LETTER HA - 0x0446: 0xF6, # CYRILLIC SMALL LETTER TSE - 0x0447: 0xF7, # CYRILLIC SMALL LETTER CHE - 0x0448: 0xF8, # CYRILLIC SMALL LETTER SHA - 0x0449: 0xF9, # CYRILLIC SMALL LETTER SHCHA - 0x044A: 0xFA, # CYRILLIC SMALL LETTER HARD SIGN - 0x044B: 0xFB, # CYRILLIC SMALL LETTER YERU - 0x044C: 0xFC, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044D: 0xFD, # CYRILLIC SMALL LETTER E - 0x044E: 0xFE, # CYRILLIC SMALL LETTER YU - 0x044F: 0xDF, # CYRILLIC SMALL LETTER YA - 0x0451: 0xDE, # CYRILLIC SMALL LETTER IO - 0x0452: 0xAC, # CYRILLIC SMALL LETTER DJE - 0x0453: 0xAF, # CYRILLIC SMALL LETTER GJE - 0x0454: 0xB9, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0455: 0xCF, # CYRILLIC SMALL LETTER DZE - 0x0456: 0xB4, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0457: 0xBB, # CYRILLIC SMALL LETTER YI - 0x0458: 0xC0, # CYRILLIC SMALL LETTER JE - 0x0459: 0xBD, # CYRILLIC SMALL LETTER LJE - 0x045A: 0xBF, # CYRILLIC SMALL LETTER NJE - 0x045B: 0xCC, # CYRILLIC SMALL LETTER TSHE - 0x045C: 0xCE, # CYRILLIC SMALL LETTER KJE - 0x045E: 0xD9, # CYRILLIC SMALL LETTER SHORT U - 0x045F: 0xDB, # CYRILLIC SMALL LETTER DZHE - 0x0490: 0xA2, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN - 0x0491: 0xB6, # CYRILLIC SMALL LETTER GHE WITH UPTURN - 0x2013: 0xD0, # EN DASH - 0x2014: 0xD1, # EM DASH - 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK - 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xD7, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xA0, # DAGGER - 0x2022: 0xA5, # BULLET - 0x2026: 0xC9, # HORIZONTAL ELLIPSIS - 0x20AC: 0xFF, # EURO SIGN - 0x2116: 0xDC, # NUMERO SIGN - 0x2122: 0xAA, # TRADE MARK SIGN - 0x2206: 0xC6, # INCREMENT - 0x221A: 0xC3, # SQUARE ROOT - 0x221E: 0xB0, # INFINITY - 0x2248: 0xC5, # ALMOST EQUAL TO - 0x2260: 0xAD, # NOT EQUAL TO - 0x2264: 0xB2, # LESS-THAN OR EQUAL TO - 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO + 0x0000: 0x00, # CONTROL CHARACTER + 0x0001: 0x01, # CONTROL CHARACTER + 0x0002: 0x02, # CONTROL CHARACTER + 0x0003: 0x03, # CONTROL CHARACTER + 0x0004: 0x04, # CONTROL CHARACTER + 0x0005: 0x05, # CONTROL CHARACTER + 0x0006: 0x06, # CONTROL CHARACTER + 0x0007: 0x07, # CONTROL CHARACTER + 0x0008: 0x08, # CONTROL CHARACTER + 0x0009: 0x09, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER + 0x0010: 0x10, # CONTROL CHARACTER + 0x0011: 0x11, # CONTROL CHARACTER + 0x0012: 0x12, # CONTROL CHARACTER + 0x0013: 0x13, # CONTROL CHARACTER + 0x0014: 0x14, # CONTROL CHARACTER + 0x0015: 0x15, # CONTROL CHARACTER + 0x0016: 0x16, # CONTROL CHARACTER + 0x0017: 0x17, # CONTROL CHARACTER + 0x0018: 0x18, # CONTROL CHARACTER + 0x0019: 0x19, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0xCA, # NO-BREAK SPACE + 0x00A3: 0xA3, # POUND SIGN + 0x00A7: 0xA4, # SECTION SIGN + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xC2, # NOT SIGN + 0x00AE: 0xA8, # REGISTERED SIGN + 0x00B0: 0xA1, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xA6, # PILCROW SIGN + 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00F7: 0xD6, # DIVISION SIGN + 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK + 0x0401: 0xDD, # CYRILLIC CAPITAL LETTER IO + 0x0402: 0xAB, # CYRILLIC CAPITAL LETTER DJE + 0x0403: 0xAE, # CYRILLIC CAPITAL LETTER GJE + 0x0404: 0xB8, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0405: 0xC1, # CYRILLIC CAPITAL LETTER DZE + 0x0406: 0xA7, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0xBA, # CYRILLIC CAPITAL LETTER YI + 0x0408: 0xB7, # CYRILLIC CAPITAL LETTER JE + 0x0409: 0xBC, # CYRILLIC CAPITAL LETTER LJE + 0x040A: 0xBE, # CYRILLIC CAPITAL LETTER NJE + 0x040B: 0xCB, # CYRILLIC CAPITAL LETTER TSHE + 0x040C: 0xCD, # CYRILLIC CAPITAL LETTER KJE + 0x040E: 0xD8, # CYRILLIC CAPITAL LETTER SHORT U + 0x040F: 0xDA, # CYRILLIC CAPITAL LETTER DZHE + 0x0410: 0x80, # CYRILLIC CAPITAL LETTER A + 0x0411: 0x81, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0x82, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0x83, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0x84, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0x85, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0x86, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0x87, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0x88, # CYRILLIC CAPITAL LETTER I + 0x0419: 0x89, # CYRILLIC CAPITAL LETTER SHORT I + 0x041A: 0x8A, # CYRILLIC CAPITAL LETTER KA + 0x041B: 0x8B, # CYRILLIC CAPITAL LETTER EL + 0x041C: 0x8C, # CYRILLIC CAPITAL LETTER EM + 0x041D: 0x8D, # CYRILLIC CAPITAL LETTER EN + 0x041E: 0x8E, # CYRILLIC CAPITAL LETTER O + 0x041F: 0x8F, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0x90, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0x91, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0x92, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0x93, # CYRILLIC CAPITAL LETTER U + 0x0424: 0x94, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0x95, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0x96, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0x97, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0x98, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0x99, # CYRILLIC CAPITAL LETTER SHCHA + 0x042A: 0x9A, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042B: 0x9B, # CYRILLIC CAPITAL LETTER YERU + 0x042C: 0x9C, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042D: 0x9D, # CYRILLIC CAPITAL LETTER E + 0x042E: 0x9E, # CYRILLIC CAPITAL LETTER YU + 0x042F: 0x9F, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0xE0, # CYRILLIC SMALL LETTER A + 0x0431: 0xE1, # CYRILLIC SMALL LETTER BE + 0x0432: 0xE2, # CYRILLIC SMALL LETTER VE + 0x0433: 0xE3, # CYRILLIC SMALL LETTER GHE + 0x0434: 0xE4, # CYRILLIC SMALL LETTER DE + 0x0435: 0xE5, # CYRILLIC SMALL LETTER IE + 0x0436: 0xE6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0xE7, # CYRILLIC SMALL LETTER ZE + 0x0438: 0xE8, # CYRILLIC SMALL LETTER I + 0x0439: 0xE9, # CYRILLIC SMALL LETTER SHORT I + 0x043A: 0xEA, # CYRILLIC SMALL LETTER KA + 0x043B: 0xEB, # CYRILLIC SMALL LETTER EL + 0x043C: 0xEC, # CYRILLIC SMALL LETTER EM + 0x043D: 0xED, # CYRILLIC SMALL LETTER EN + 0x043E: 0xEE, # CYRILLIC SMALL LETTER O + 0x043F: 0xEF, # CYRILLIC SMALL LETTER PE + 0x0440: 0xF0, # CYRILLIC SMALL LETTER ER + 0x0441: 0xF1, # CYRILLIC SMALL LETTER ES + 0x0442: 0xF2, # CYRILLIC SMALL LETTER TE + 0x0443: 0xF3, # CYRILLIC SMALL LETTER U + 0x0444: 0xF4, # CYRILLIC SMALL LETTER EF + 0x0445: 0xF5, # CYRILLIC SMALL LETTER HA + 0x0446: 0xF6, # CYRILLIC SMALL LETTER TSE + 0x0447: 0xF7, # CYRILLIC SMALL LETTER CHE + 0x0448: 0xF8, # CYRILLIC SMALL LETTER SHA + 0x0449: 0xF9, # CYRILLIC SMALL LETTER SHCHA + 0x044A: 0xFA, # CYRILLIC SMALL LETTER HARD SIGN + 0x044B: 0xFB, # CYRILLIC SMALL LETTER YERU + 0x044C: 0xFC, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044D: 0xFD, # CYRILLIC SMALL LETTER E + 0x044E: 0xFE, # CYRILLIC SMALL LETTER YU + 0x044F: 0xDF, # CYRILLIC SMALL LETTER YA + 0x0451: 0xDE, # CYRILLIC SMALL LETTER IO + 0x0452: 0xAC, # CYRILLIC SMALL LETTER DJE + 0x0453: 0xAF, # CYRILLIC SMALL LETTER GJE + 0x0454: 0xB9, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0455: 0xCF, # CYRILLIC SMALL LETTER DZE + 0x0456: 0xB4, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0xBB, # CYRILLIC SMALL LETTER YI + 0x0458: 0xC0, # CYRILLIC SMALL LETTER JE + 0x0459: 0xBD, # CYRILLIC SMALL LETTER LJE + 0x045A: 0xBF, # CYRILLIC SMALL LETTER NJE + 0x045B: 0xCC, # CYRILLIC SMALL LETTER TSHE + 0x045C: 0xCE, # CYRILLIC SMALL LETTER KJE + 0x045E: 0xD9, # CYRILLIC SMALL LETTER SHORT U + 0x045F: 0xDB, # CYRILLIC SMALL LETTER DZHE + 0x0490: 0xA2, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN + 0x0491: 0xB6, # CYRILLIC SMALL LETTER GHE WITH UPTURN + 0x2013: 0xD0, # EN DASH + 0x2014: 0xD1, # EM DASH + 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK + 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xD7, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0xA0, # DAGGER + 0x2022: 0xA5, # BULLET + 0x2026: 0xC9, # HORIZONTAL ELLIPSIS + 0x20AC: 0xFF, # EURO SIGN + 0x2116: 0xDC, # NUMERO SIGN + 0x2122: 0xAA, # TRADE MARK SIGN + 0x2206: 0xC6, # INCREMENT + 0x221A: 0xC3, # SQUARE ROOT + 0x221E: 0xB0, # INFINITY + 0x2248: 0xC5, # ALMOST EQUAL TO + 0x2260: 0xAD, # NOT EQUAL TO + 0x2264: 0xB2, # LESS-THAN OR EQUAL TO + 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO } - Modified: python/branches/ssize_t/Lib/encodings/mac_farsi.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/mac_farsi.py (original) +++ python/branches/ssize_t/Lib/encodings/mac_farsi.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE, left-right - u'!' # 0x21 -> EXCLAMATION MARK, left-right - u'"' # 0x22 -> QUOTATION MARK, left-right - u'#' # 0x23 -> NUMBER SIGN, left-right - u'$' # 0x24 -> DOLLAR SIGN, left-right - u'%' # 0x25 -> PERCENT SIGN, left-right - u'&' # 0x26 -> AMPERSAND, left-right - u"'" # 0x27 -> APOSTROPHE, left-right - u'(' # 0x28 -> LEFT PARENTHESIS, left-right - u')' # 0x29 -> RIGHT PARENTHESIS, left-right - u'*' # 0x2A -> ASTERISK, left-right - u'+' # 0x2B -> PLUS SIGN, left-right - u',' # 0x2C -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR - u'-' # 0x2D -> HYPHEN-MINUS, left-right - u'.' # 0x2E -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR - u'/' # 0x2F -> SOLIDUS, left-right - u'0' # 0x30 -> DIGIT ZERO; in Arabic-script context, displayed as 0x06F0 EXTENDED ARABIC-INDIC DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE; in Arabic-script context, displayed as 0x06F1 EXTENDED ARABIC-INDIC DIGIT ONE - u'2' # 0x32 -> DIGIT TWO; in Arabic-script context, displayed as 0x06F2 EXTENDED ARABIC-INDIC DIGIT TWO - u'3' # 0x33 -> DIGIT THREE; in Arabic-script context, displayed as 0x06F3 EXTENDED ARABIC-INDIC DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR; in Arabic-script context, displayed as 0x06F4 EXTENDED ARABIC-INDIC DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE; in Arabic-script context, displayed as 0x06F5 EXTENDED ARABIC-INDIC DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX; in Arabic-script context, displayed as 0x06F6 EXTENDED ARABIC-INDIC DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x06F7 EXTENDED ARABIC-INDIC DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE; in Arabic-script context, displayed as 0x06F9 EXTENDED ARABIC-INDIC DIGIT NINE - u':' # 0x3A -> COLON, left-right - u';' # 0x3B -> SEMICOLON, left-right - u'<' # 0x3C -> LESS-THAN SIGN, left-right - u'=' # 0x3D -> EQUALS SIGN, left-right - u'>' # 0x3E -> GREATER-THAN SIGN, left-right - u'?' # 0x3F -> QUESTION MARK, left-right - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET, left-right - u'\\' # 0x5C -> REVERSE SOLIDUS, left-right - u']' # 0x5D -> RIGHT SQUARE BRACKET, left-right - u'^' # 0x5E -> CIRCUMFLEX ACCENT, left-right - u'_' # 0x5F -> LOW LINE, left-right - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET, left-right - u'|' # 0x7C -> VERTICAL LINE, left-right - u'}' # 0x7D -> RIGHT CURLY BRACKET, left-right - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xa0' # 0x81 -> NO-BREAK SPACE, right-left - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u06ba' # 0x8B -> ARABIC LETTER NOON GHUNNA - u'\xab' # 0x8C -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\u2026' # 0x93 -> HORIZONTAL ELLIPSIS, right-left - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xbb' # 0x98 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0x9B -> DIVISION SIGN, right-left - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u' ' # 0xA0 -> SPACE, right-left - u'!' # 0xA1 -> EXCLAMATION MARK, right-left - u'"' # 0xA2 -> QUOTATION MARK, right-left - u'#' # 0xA3 -> NUMBER SIGN, right-left - u'$' # 0xA4 -> DOLLAR SIGN, right-left - u'\u066a' # 0xA5 -> ARABIC PERCENT SIGN - u'&' # 0xA6 -> AMPERSAND, right-left - u"'" # 0xA7 -> APOSTROPHE, right-left - u'(' # 0xA8 -> LEFT PARENTHESIS, right-left - u')' # 0xA9 -> RIGHT PARENTHESIS, right-left - u'*' # 0xAA -> ASTERISK, right-left - u'+' # 0xAB -> PLUS SIGN, right-left - u'\u060c' # 0xAC -> ARABIC COMMA - u'-' # 0xAD -> HYPHEN-MINUS, right-left - u'.' # 0xAE -> FULL STOP, right-left - u'/' # 0xAF -> SOLIDUS, right-left - u'\u06f0' # 0xB0 -> EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override) - u'\u06f1' # 0xB1 -> EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override) - u'\u06f2' # 0xB2 -> EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override) - u'\u06f3' # 0xB3 -> EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override) - u'\u06f4' # 0xB4 -> EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override) - u'\u06f5' # 0xB5 -> EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override) - u'\u06f6' # 0xB6 -> EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override) - u'\u06f7' # 0xB7 -> EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override) - u'\u06f8' # 0xB8 -> EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override) - u'\u06f9' # 0xB9 -> EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override) - u':' # 0xBA -> COLON, right-left - u'\u061b' # 0xBB -> ARABIC SEMICOLON - u'<' # 0xBC -> LESS-THAN SIGN, right-left - u'=' # 0xBD -> EQUALS SIGN, right-left - u'>' # 0xBE -> GREATER-THAN SIGN, right-left - u'\u061f' # 0xBF -> ARABIC QUESTION MARK - u'\u274a' # 0xC0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left - u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA - u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0xC7 -> ARABIC LETTER ALEF - u'\u0628' # 0xC8 -> ARABIC LETTER BEH - u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0xCA -> ARABIC LETTER TEH - u'\u062b' # 0xCB -> ARABIC LETTER THEH - u'\u062c' # 0xCC -> ARABIC LETTER JEEM - u'\u062d' # 0xCD -> ARABIC LETTER HAH - u'\u062e' # 0xCE -> ARABIC LETTER KHAH - u'\u062f' # 0xCF -> ARABIC LETTER DAL - u'\u0630' # 0xD0 -> ARABIC LETTER THAL - u'\u0631' # 0xD1 -> ARABIC LETTER REH - u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN - u'\u0633' # 0xD3 -> ARABIC LETTER SEEN - u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN - u'\u0635' # 0xD5 -> ARABIC LETTER SAD - u'\u0636' # 0xD6 -> ARABIC LETTER DAD - u'\u0637' # 0xD7 -> ARABIC LETTER TAH - u'\u0638' # 0xD8 -> ARABIC LETTER ZAH - u'\u0639' # 0xD9 -> ARABIC LETTER AIN - u'\u063a' # 0xDA -> ARABIC LETTER GHAIN - u'[' # 0xDB -> LEFT SQUARE BRACKET, right-left - u'\\' # 0xDC -> REVERSE SOLIDUS, right-left - u']' # 0xDD -> RIGHT SQUARE BRACKET, right-left - u'^' # 0xDE -> CIRCUMFLEX ACCENT, right-left - u'_' # 0xDF -> LOW LINE, right-left - u'\u0640' # 0xE0 -> ARABIC TATWEEL - u'\u0641' # 0xE1 -> ARABIC LETTER FEH - u'\u0642' # 0xE2 -> ARABIC LETTER QAF - u'\u0643' # 0xE3 -> ARABIC LETTER KAF - u'\u0644' # 0xE4 -> ARABIC LETTER LAM - u'\u0645' # 0xE5 -> ARABIC LETTER MEEM - u'\u0646' # 0xE6 -> ARABIC LETTER NOON - u'\u0647' # 0xE7 -> ARABIC LETTER HEH - u'\u0648' # 0xE8 -> ARABIC LETTER WAW - u'\u0649' # 0xE9 -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0xEA -> ARABIC LETTER YEH - u'\u064b' # 0xEB -> ARABIC FATHATAN - u'\u064c' # 0xEC -> ARABIC DAMMATAN - u'\u064d' # 0xED -> ARABIC KASRATAN - u'\u064e' # 0xEE -> ARABIC FATHA - u'\u064f' # 0xEF -> ARABIC DAMMA - u'\u0650' # 0xF0 -> ARABIC KASRA - u'\u0651' # 0xF1 -> ARABIC SHADDA - u'\u0652' # 0xF2 -> ARABIC SUKUN - u'\u067e' # 0xF3 -> ARABIC LETTER PEH - u'\u0679' # 0xF4 -> ARABIC LETTER TTEH - u'\u0686' # 0xF5 -> ARABIC LETTER TCHEH - u'\u06d5' # 0xF6 -> ARABIC LETTER AE - u'\u06a4' # 0xF7 -> ARABIC LETTER VEH - u'\u06af' # 0xF8 -> ARABIC LETTER GAF - u'\u0688' # 0xF9 -> ARABIC LETTER DDAL - u'\u0691' # 0xFA -> ARABIC LETTER RREH - u'{' # 0xFB -> LEFT CURLY BRACKET, right-left - u'|' # 0xFC -> VERTICAL LINE, right-left - u'}' # 0xFD -> RIGHT CURLY BRACKET, right-left - u'\u0698' # 0xFE -> ARABIC LETTER JEH - u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE, left-right + u'!' # 0x21 -> EXCLAMATION MARK, left-right + u'"' # 0x22 -> QUOTATION MARK, left-right + u'#' # 0x23 -> NUMBER SIGN, left-right + u'$' # 0x24 -> DOLLAR SIGN, left-right + u'%' # 0x25 -> PERCENT SIGN, left-right + u'&' # 0x26 -> AMPERSAND, left-right + u"'" # 0x27 -> APOSTROPHE, left-right + u'(' # 0x28 -> LEFT PARENTHESIS, left-right + u')' # 0x29 -> RIGHT PARENTHESIS, left-right + u'*' # 0x2A -> ASTERISK, left-right + u'+' # 0x2B -> PLUS SIGN, left-right + u',' # 0x2C -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR + u'-' # 0x2D -> HYPHEN-MINUS, left-right + u'.' # 0x2E -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR + u'/' # 0x2F -> SOLIDUS, left-right + u'0' # 0x30 -> DIGIT ZERO; in Arabic-script context, displayed as 0x06F0 EXTENDED ARABIC-INDIC DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE; in Arabic-script context, displayed as 0x06F1 EXTENDED ARABIC-INDIC DIGIT ONE + u'2' # 0x32 -> DIGIT TWO; in Arabic-script context, displayed as 0x06F2 EXTENDED ARABIC-INDIC DIGIT TWO + u'3' # 0x33 -> DIGIT THREE; in Arabic-script context, displayed as 0x06F3 EXTENDED ARABIC-INDIC DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR; in Arabic-script context, displayed as 0x06F4 EXTENDED ARABIC-INDIC DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE; in Arabic-script context, displayed as 0x06F5 EXTENDED ARABIC-INDIC DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX; in Arabic-script context, displayed as 0x06F6 EXTENDED ARABIC-INDIC DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x06F7 EXTENDED ARABIC-INDIC DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE; in Arabic-script context, displayed as 0x06F9 EXTENDED ARABIC-INDIC DIGIT NINE + u':' # 0x3A -> COLON, left-right + u';' # 0x3B -> SEMICOLON, left-right + u'<' # 0x3C -> LESS-THAN SIGN, left-right + u'=' # 0x3D -> EQUALS SIGN, left-right + u'>' # 0x3E -> GREATER-THAN SIGN, left-right + u'?' # 0x3F -> QUESTION MARK, left-right + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET, left-right + u'\\' # 0x5C -> REVERSE SOLIDUS, left-right + u']' # 0x5D -> RIGHT SQUARE BRACKET, left-right + u'^' # 0x5E -> CIRCUMFLEX ACCENT, left-right + u'_' # 0x5F -> LOW LINE, left-right + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET, left-right + u'|' # 0x7C -> VERTICAL LINE, left-right + u'}' # 0x7D -> RIGHT CURLY BRACKET, left-right + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xa0' # 0x81 -> NO-BREAK SPACE, right-left + u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u06ba' # 0x8B -> ARABIC LETTER NOON GHUNNA + u'\xab' # 0x8C -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE + u'\u2026' # 0x93 -> HORIZONTAL ELLIPSIS, right-left + u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE + u'\xbb' # 0x98 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x9B -> DIVISION SIGN, right-left + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u' ' # 0xA0 -> SPACE, right-left + u'!' # 0xA1 -> EXCLAMATION MARK, right-left + u'"' # 0xA2 -> QUOTATION MARK, right-left + u'#' # 0xA3 -> NUMBER SIGN, right-left + u'$' # 0xA4 -> DOLLAR SIGN, right-left + u'\u066a' # 0xA5 -> ARABIC PERCENT SIGN + u'&' # 0xA6 -> AMPERSAND, right-left + u"'" # 0xA7 -> APOSTROPHE, right-left + u'(' # 0xA8 -> LEFT PARENTHESIS, right-left + u')' # 0xA9 -> RIGHT PARENTHESIS, right-left + u'*' # 0xAA -> ASTERISK, right-left + u'+' # 0xAB -> PLUS SIGN, right-left + u'\u060c' # 0xAC -> ARABIC COMMA + u'-' # 0xAD -> HYPHEN-MINUS, right-left + u'.' # 0xAE -> FULL STOP, right-left + u'/' # 0xAF -> SOLIDUS, right-left + u'\u06f0' # 0xB0 -> EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override) + u'\u06f1' # 0xB1 -> EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override) + u'\u06f2' # 0xB2 -> EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override) + u'\u06f3' # 0xB3 -> EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override) + u'\u06f4' # 0xB4 -> EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override) + u'\u06f5' # 0xB5 -> EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override) + u'\u06f6' # 0xB6 -> EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override) + u'\u06f7' # 0xB7 -> EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override) + u'\u06f8' # 0xB8 -> EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override) + u'\u06f9' # 0xB9 -> EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override) + u':' # 0xBA -> COLON, right-left + u'\u061b' # 0xBB -> ARABIC SEMICOLON + u'<' # 0xBC -> LESS-THAN SIGN, right-left + u'=' # 0xBD -> EQUALS SIGN, right-left + u'>' # 0xBE -> GREATER-THAN SIGN, right-left + u'\u061f' # 0xBF -> ARABIC QUESTION MARK + u'\u274a' # 0xC0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left + u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA + u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE + u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE + u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE + u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW + u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE + u'\u0627' # 0xC7 -> ARABIC LETTER ALEF + u'\u0628' # 0xC8 -> ARABIC LETTER BEH + u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA + u'\u062a' # 0xCA -> ARABIC LETTER TEH + u'\u062b' # 0xCB -> ARABIC LETTER THEH + u'\u062c' # 0xCC -> ARABIC LETTER JEEM + u'\u062d' # 0xCD -> ARABIC LETTER HAH + u'\u062e' # 0xCE -> ARABIC LETTER KHAH + u'\u062f' # 0xCF -> ARABIC LETTER DAL + u'\u0630' # 0xD0 -> ARABIC LETTER THAL + u'\u0631' # 0xD1 -> ARABIC LETTER REH + u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN + u'\u0633' # 0xD3 -> ARABIC LETTER SEEN + u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN + u'\u0635' # 0xD5 -> ARABIC LETTER SAD + u'\u0636' # 0xD6 -> ARABIC LETTER DAD + u'\u0637' # 0xD7 -> ARABIC LETTER TAH + u'\u0638' # 0xD8 -> ARABIC LETTER ZAH + u'\u0639' # 0xD9 -> ARABIC LETTER AIN + u'\u063a' # 0xDA -> ARABIC LETTER GHAIN + u'[' # 0xDB -> LEFT SQUARE BRACKET, right-left + u'\\' # 0xDC -> REVERSE SOLIDUS, right-left + u']' # 0xDD -> RIGHT SQUARE BRACKET, right-left + u'^' # 0xDE -> CIRCUMFLEX ACCENT, right-left + u'_' # 0xDF -> LOW LINE, right-left + u'\u0640' # 0xE0 -> ARABIC TATWEEL + u'\u0641' # 0xE1 -> ARABIC LETTER FEH + u'\u0642' # 0xE2 -> ARABIC LETTER QAF + u'\u0643' # 0xE3 -> ARABIC LETTER KAF + u'\u0644' # 0xE4 -> ARABIC LETTER LAM + u'\u0645' # 0xE5 -> ARABIC LETTER MEEM + u'\u0646' # 0xE6 -> ARABIC LETTER NOON + u'\u0647' # 0xE7 -> ARABIC LETTER HEH + u'\u0648' # 0xE8 -> ARABIC LETTER WAW + u'\u0649' # 0xE9 -> ARABIC LETTER ALEF MAKSURA + u'\u064a' # 0xEA -> ARABIC LETTER YEH + u'\u064b' # 0xEB -> ARABIC FATHATAN + u'\u064c' # 0xEC -> ARABIC DAMMATAN + u'\u064d' # 0xED -> ARABIC KASRATAN + u'\u064e' # 0xEE -> ARABIC FATHA + u'\u064f' # 0xEF -> ARABIC DAMMA + u'\u0650' # 0xF0 -> ARABIC KASRA + u'\u0651' # 0xF1 -> ARABIC SHADDA + u'\u0652' # 0xF2 -> ARABIC SUKUN + u'\u067e' # 0xF3 -> ARABIC LETTER PEH + u'\u0679' # 0xF4 -> ARABIC LETTER TTEH + u'\u0686' # 0xF5 -> ARABIC LETTER TCHEH + u'\u06d5' # 0xF6 -> ARABIC LETTER AE + u'\u06a4' # 0xF7 -> ARABIC LETTER VEH + u'\u06af' # 0xF8 -> ARABIC LETTER GAF + u'\u0688' # 0xF9 -> ARABIC LETTER DDAL + u'\u0691' # 0xFA -> ARABIC LETTER RREH + u'{' # 0xFB -> LEFT CURLY BRACKET, right-left + u'|' # 0xFC -> VERTICAL LINE, right-left + u'}' # 0xFD -> RIGHT CURLY BRACKET, right-left + u'\u0698' # 0xFE -> ARABIC LETTER JEH + u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE, left-right - 0x0020: 0xA0, # SPACE, right-left - 0x0021: 0x21, # EXCLAMATION MARK, left-right - 0x0021: 0xA1, # EXCLAMATION MARK, right-left - 0x0022: 0x22, # QUOTATION MARK, left-right - 0x0022: 0xA2, # QUOTATION MARK, right-left - 0x0023: 0x23, # NUMBER SIGN, left-right - 0x0023: 0xA3, # NUMBER SIGN, right-left - 0x0024: 0x24, # DOLLAR SIGN, left-right - 0x0024: 0xA4, # DOLLAR SIGN, right-left - 0x0025: 0x25, # PERCENT SIGN, left-right - 0x0026: 0x26, # AMPERSAND, left-right - 0x0026: 0xA6, # AMPERSAND, right-left - 0x0027: 0x27, # APOSTROPHE, left-right - 0x0027: 0xA7, # APOSTROPHE, right-left - 0x0028: 0x28, # LEFT PARENTHESIS, left-right - 0x0028: 0xA8, # LEFT PARENTHESIS, right-left - 0x0029: 0x29, # RIGHT PARENTHESIS, left-right - 0x0029: 0xA9, # RIGHT PARENTHESIS, right-left - 0x002A: 0x2A, # ASTERISK, left-right - 0x002A: 0xAA, # ASTERISK, right-left - 0x002B: 0x2B, # PLUS SIGN, left-right - 0x002B: 0xAB, # PLUS SIGN, right-left - 0x002C: 0x2C, # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR - 0x002D: 0x2D, # HYPHEN-MINUS, left-right - 0x002D: 0xAD, # HYPHEN-MINUS, right-left - 0x002E: 0x2E, # FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR - 0x002E: 0xAE, # FULL STOP, right-left - 0x002F: 0x2F, # SOLIDUS, left-right - 0x002F: 0xAF, # SOLIDUS, right-left - 0x0030: 0x30, # DIGIT ZERO; in Arabic-script context, displayed as 0x06F0 EXTENDED ARABIC-INDIC DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE; in Arabic-script context, displayed as 0x06F1 EXTENDED ARABIC-INDIC DIGIT ONE - 0x0032: 0x32, # DIGIT TWO; in Arabic-script context, displayed as 0x06F2 EXTENDED ARABIC-INDIC DIGIT TWO - 0x0033: 0x33, # DIGIT THREE; in Arabic-script context, displayed as 0x06F3 EXTENDED ARABIC-INDIC DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR; in Arabic-script context, displayed as 0x06F4 EXTENDED ARABIC-INDIC DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE; in Arabic-script context, displayed as 0x06F5 EXTENDED ARABIC-INDIC DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX; in Arabic-script context, displayed as 0x06F6 EXTENDED ARABIC-INDIC DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN; in Arabic-script context, displayed as 0x06F7 EXTENDED ARABIC-INDIC DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT; in Arabic-script context, displayed as 0x06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE; in Arabic-script context, displayed as 0x06F9 EXTENDED ARABIC-INDIC DIGIT NINE - 0x003A: 0x3A, # COLON, left-right - 0x003A: 0xBA, # COLON, right-left - 0x003B: 0x3B, # SEMICOLON, left-right - 0x003C: 0x3C, # LESS-THAN SIGN, left-right - 0x003C: 0xBC, # LESS-THAN SIGN, right-left - 0x003D: 0x3D, # EQUALS SIGN, left-right - 0x003D: 0xBD, # EQUALS SIGN, right-left - 0x003E: 0x3E, # GREATER-THAN SIGN, left-right - 0x003E: 0xBE, # GREATER-THAN SIGN, right-left - 0x003F: 0x3F, # QUESTION MARK, left-right - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET, left-right - 0x005B: 0xDB, # LEFT SQUARE BRACKET, right-left - 0x005C: 0x5C, # REVERSE SOLIDUS, left-right - 0x005C: 0xDC, # REVERSE SOLIDUS, right-left - 0x005D: 0x5D, # RIGHT SQUARE BRACKET, left-right - 0x005D: 0xDD, # RIGHT SQUARE BRACKET, right-left - 0x005E: 0x5E, # CIRCUMFLEX ACCENT, left-right - 0x005E: 0xDE, # CIRCUMFLEX ACCENT, right-left - 0x005F: 0x5F, # LOW LINE, left-right - 0x005F: 0xDF, # LOW LINE, right-left - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET, left-right - 0x007B: 0xFB, # LEFT CURLY BRACKET, right-left - 0x007C: 0x7C, # VERTICAL LINE, left-right - 0x007C: 0xFC, # VERTICAL LINE, right-left - 0x007D: 0x7D, # RIGHT CURLY BRACKET, left-right - 0x007D: 0xFD, # RIGHT CURLY BRACKET, right-left - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0x81, # NO-BREAK SPACE, right-left - 0x00AB: 0x8C, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x00BB: 0x98, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0x9B, # DIVISION SIGN, right-left - 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS - 0x060C: 0xAC, # ARABIC COMMA - 0x061B: 0xBB, # ARABIC SEMICOLON - 0x061F: 0xBF, # ARABIC QUESTION MARK - 0x0621: 0xC1, # ARABIC LETTER HAMZA - 0x0622: 0xC2, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x0623: 0xC3, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x0624: 0xC4, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x0625: 0xC5, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x0626: 0xC6, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x0627: 0xC7, # ARABIC LETTER ALEF - 0x0628: 0xC8, # ARABIC LETTER BEH - 0x0629: 0xC9, # ARABIC LETTER TEH MARBUTA - 0x062A: 0xCA, # ARABIC LETTER TEH - 0x062B: 0xCB, # ARABIC LETTER THEH - 0x062C: 0xCC, # ARABIC LETTER JEEM - 0x062D: 0xCD, # ARABIC LETTER HAH - 0x062E: 0xCE, # ARABIC LETTER KHAH - 0x062F: 0xCF, # ARABIC LETTER DAL - 0x0630: 0xD0, # ARABIC LETTER THAL - 0x0631: 0xD1, # ARABIC LETTER REH - 0x0632: 0xD2, # ARABIC LETTER ZAIN - 0x0633: 0xD3, # ARABIC LETTER SEEN - 0x0634: 0xD4, # ARABIC LETTER SHEEN - 0x0635: 0xD5, # ARABIC LETTER SAD - 0x0636: 0xD6, # ARABIC LETTER DAD - 0x0637: 0xD7, # ARABIC LETTER TAH - 0x0638: 0xD8, # ARABIC LETTER ZAH - 0x0639: 0xD9, # ARABIC LETTER AIN - 0x063A: 0xDA, # ARABIC LETTER GHAIN - 0x0640: 0xE0, # ARABIC TATWEEL - 0x0641: 0xE1, # ARABIC LETTER FEH - 0x0642: 0xE2, # ARABIC LETTER QAF - 0x0643: 0xE3, # ARABIC LETTER KAF - 0x0644: 0xE4, # ARABIC LETTER LAM - 0x0645: 0xE5, # ARABIC LETTER MEEM - 0x0646: 0xE6, # ARABIC LETTER NOON - 0x0647: 0xE7, # ARABIC LETTER HEH - 0x0648: 0xE8, # ARABIC LETTER WAW - 0x0649: 0xE9, # ARABIC LETTER ALEF MAKSURA - 0x064A: 0xEA, # ARABIC LETTER YEH - 0x064B: 0xEB, # ARABIC FATHATAN - 0x064C: 0xEC, # ARABIC DAMMATAN - 0x064D: 0xED, # ARABIC KASRATAN - 0x064E: 0xEE, # ARABIC FATHA - 0x064F: 0xEF, # ARABIC DAMMA - 0x0650: 0xF0, # ARABIC KASRA - 0x0651: 0xF1, # ARABIC SHADDA - 0x0652: 0xF2, # ARABIC SUKUN - 0x066A: 0xA5, # ARABIC PERCENT SIGN - 0x0679: 0xF4, # ARABIC LETTER TTEH - 0x067E: 0xF3, # ARABIC LETTER PEH - 0x0686: 0xF5, # ARABIC LETTER TCHEH - 0x0688: 0xF9, # ARABIC LETTER DDAL - 0x0691: 0xFA, # ARABIC LETTER RREH - 0x0698: 0xFE, # ARABIC LETTER JEH - 0x06A4: 0xF7, # ARABIC LETTER VEH - 0x06AF: 0xF8, # ARABIC LETTER GAF - 0x06BA: 0x8B, # ARABIC LETTER NOON GHUNNA - 0x06D2: 0xFF, # ARABIC LETTER YEH BARREE - 0x06D5: 0xF6, # ARABIC LETTER AE - 0x06F0: 0xB0, # EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override) - 0x06F1: 0xB1, # EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override) - 0x06F2: 0xB2, # EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override) - 0x06F3: 0xB3, # EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override) - 0x06F4: 0xB4, # EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override) - 0x06F5: 0xB5, # EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override) - 0x06F6: 0xB6, # EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override) - 0x06F7: 0xB7, # EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override) - 0x06F8: 0xB8, # EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override) - 0x06F9: 0xB9, # EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override) - 0x2026: 0x93, # HORIZONTAL ELLIPSIS, right-left - 0x274A: 0xC0, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left + 0x0000: 0x00, # CONTROL CHARACTER + 0x0001: 0x01, # CONTROL CHARACTER + 0x0002: 0x02, # CONTROL CHARACTER + 0x0003: 0x03, # CONTROL CHARACTER + 0x0004: 0x04, # CONTROL CHARACTER + 0x0005: 0x05, # CONTROL CHARACTER + 0x0006: 0x06, # CONTROL CHARACTER + 0x0007: 0x07, # CONTROL CHARACTER + 0x0008: 0x08, # CONTROL CHARACTER + 0x0009: 0x09, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER + 0x0010: 0x10, # CONTROL CHARACTER + 0x0011: 0x11, # CONTROL CHARACTER + 0x0012: 0x12, # CONTROL CHARACTER + 0x0013: 0x13, # CONTROL CHARACTER + 0x0014: 0x14, # CONTROL CHARACTER + 0x0015: 0x15, # CONTROL CHARACTER + 0x0016: 0x16, # CONTROL CHARACTER + 0x0017: 0x17, # CONTROL CHARACTER + 0x0018: 0x18, # CONTROL CHARACTER + 0x0019: 0x19, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER + 0x0020: 0x20, # SPACE, left-right + 0x0020: 0xA0, # SPACE, right-left + 0x0021: 0x21, # EXCLAMATION MARK, left-right + 0x0021: 0xA1, # EXCLAMATION MARK, right-left + 0x0022: 0x22, # QUOTATION MARK, left-right + 0x0022: 0xA2, # QUOTATION MARK, right-left + 0x0023: 0x23, # NUMBER SIGN, left-right + 0x0023: 0xA3, # NUMBER SIGN, right-left + 0x0024: 0x24, # DOLLAR SIGN, left-right + 0x0024: 0xA4, # DOLLAR SIGN, right-left + 0x0025: 0x25, # PERCENT SIGN, left-right + 0x0026: 0x26, # AMPERSAND, left-right + 0x0026: 0xA6, # AMPERSAND, right-left + 0x0027: 0x27, # APOSTROPHE, left-right + 0x0027: 0xA7, # APOSTROPHE, right-left + 0x0028: 0x28, # LEFT PARENTHESIS, left-right + 0x0028: 0xA8, # LEFT PARENTHESIS, right-left + 0x0029: 0x29, # RIGHT PARENTHESIS, left-right + 0x0029: 0xA9, # RIGHT PARENTHESIS, right-left + 0x002A: 0x2A, # ASTERISK, left-right + 0x002A: 0xAA, # ASTERISK, right-left + 0x002B: 0x2B, # PLUS SIGN, left-right + 0x002B: 0xAB, # PLUS SIGN, right-left + 0x002C: 0x2C, # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR + 0x002D: 0x2D, # HYPHEN-MINUS, left-right + 0x002D: 0xAD, # HYPHEN-MINUS, right-left + 0x002E: 0x2E, # FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR + 0x002E: 0xAE, # FULL STOP, right-left + 0x002F: 0x2F, # SOLIDUS, left-right + 0x002F: 0xAF, # SOLIDUS, right-left + 0x0030: 0x30, # DIGIT ZERO; in Arabic-script context, displayed as 0x06F0 EXTENDED ARABIC-INDIC DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE; in Arabic-script context, displayed as 0x06F1 EXTENDED ARABIC-INDIC DIGIT ONE + 0x0032: 0x32, # DIGIT TWO; in Arabic-script context, displayed as 0x06F2 EXTENDED ARABIC-INDIC DIGIT TWO + 0x0033: 0x33, # DIGIT THREE; in Arabic-script context, displayed as 0x06F3 EXTENDED ARABIC-INDIC DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR; in Arabic-script context, displayed as 0x06F4 EXTENDED ARABIC-INDIC DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE; in Arabic-script context, displayed as 0x06F5 EXTENDED ARABIC-INDIC DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX; in Arabic-script context, displayed as 0x06F6 EXTENDED ARABIC-INDIC DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN; in Arabic-script context, displayed as 0x06F7 EXTENDED ARABIC-INDIC DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT; in Arabic-script context, displayed as 0x06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE; in Arabic-script context, displayed as 0x06F9 EXTENDED ARABIC-INDIC DIGIT NINE + 0x003A: 0x3A, # COLON, left-right + 0x003A: 0xBA, # COLON, right-left + 0x003B: 0x3B, # SEMICOLON, left-right + 0x003C: 0x3C, # LESS-THAN SIGN, left-right + 0x003C: 0xBC, # LESS-THAN SIGN, right-left + 0x003D: 0x3D, # EQUALS SIGN, left-right + 0x003D: 0xBD, # EQUALS SIGN, right-left + 0x003E: 0x3E, # GREATER-THAN SIGN, left-right + 0x003E: 0xBE, # GREATER-THAN SIGN, right-left + 0x003F: 0x3F, # QUESTION MARK, left-right + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET, left-right + 0x005B: 0xDB, # LEFT SQUARE BRACKET, right-left + 0x005C: 0x5C, # REVERSE SOLIDUS, left-right + 0x005C: 0xDC, # REVERSE SOLIDUS, right-left + 0x005D: 0x5D, # RIGHT SQUARE BRACKET, left-right + 0x005D: 0xDD, # RIGHT SQUARE BRACKET, right-left + 0x005E: 0x5E, # CIRCUMFLEX ACCENT, left-right + 0x005E: 0xDE, # CIRCUMFLEX ACCENT, right-left + 0x005F: 0x5F, # LOW LINE, left-right + 0x005F: 0xDF, # LOW LINE, right-left + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET, left-right + 0x007B: 0xFB, # LEFT CURLY BRACKET, right-left + 0x007C: 0x7C, # VERTICAL LINE, left-right + 0x007C: 0xFC, # VERTICAL LINE, right-left + 0x007D: 0x7D, # RIGHT CURLY BRACKET, left-right + 0x007D: 0xFD, # RIGHT CURLY BRACKET, right-left + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0x81, # NO-BREAK SPACE, right-left + 0x00AB: 0x8C, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + 0x00BB: 0x98, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE + 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0x9B, # DIVISION SIGN, right-left + 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS + 0x060C: 0xAC, # ARABIC COMMA + 0x061B: 0xBB, # ARABIC SEMICOLON + 0x061F: 0xBF, # ARABIC QUESTION MARK + 0x0621: 0xC1, # ARABIC LETTER HAMZA + 0x0622: 0xC2, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x0623: 0xC3, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x0624: 0xC4, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x0625: 0xC5, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x0626: 0xC6, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x0627: 0xC7, # ARABIC LETTER ALEF + 0x0628: 0xC8, # ARABIC LETTER BEH + 0x0629: 0xC9, # ARABIC LETTER TEH MARBUTA + 0x062A: 0xCA, # ARABIC LETTER TEH + 0x062B: 0xCB, # ARABIC LETTER THEH + 0x062C: 0xCC, # ARABIC LETTER JEEM + 0x062D: 0xCD, # ARABIC LETTER HAH + 0x062E: 0xCE, # ARABIC LETTER KHAH + 0x062F: 0xCF, # ARABIC LETTER DAL + 0x0630: 0xD0, # ARABIC LETTER THAL + 0x0631: 0xD1, # ARABIC LETTER REH + 0x0632: 0xD2, # ARABIC LETTER ZAIN + 0x0633: 0xD3, # ARABIC LETTER SEEN + 0x0634: 0xD4, # ARABIC LETTER SHEEN + 0x0635: 0xD5, # ARABIC LETTER SAD + 0x0636: 0xD6, # ARABIC LETTER DAD + 0x0637: 0xD7, # ARABIC LETTER TAH + 0x0638: 0xD8, # ARABIC LETTER ZAH + 0x0639: 0xD9, # ARABIC LETTER AIN + 0x063A: 0xDA, # ARABIC LETTER GHAIN + 0x0640: 0xE0, # ARABIC TATWEEL + 0x0641: 0xE1, # ARABIC LETTER FEH + 0x0642: 0xE2, # ARABIC LETTER QAF + 0x0643: 0xE3, # ARABIC LETTER KAF + 0x0644: 0xE4, # ARABIC LETTER LAM + 0x0645: 0xE5, # ARABIC LETTER MEEM + 0x0646: 0xE6, # ARABIC LETTER NOON + 0x0647: 0xE7, # ARABIC LETTER HEH + 0x0648: 0xE8, # ARABIC LETTER WAW + 0x0649: 0xE9, # ARABIC LETTER ALEF MAKSURA + 0x064A: 0xEA, # ARABIC LETTER YEH + 0x064B: 0xEB, # ARABIC FATHATAN + 0x064C: 0xEC, # ARABIC DAMMATAN + 0x064D: 0xED, # ARABIC KASRATAN + 0x064E: 0xEE, # ARABIC FATHA + 0x064F: 0xEF, # ARABIC DAMMA + 0x0650: 0xF0, # ARABIC KASRA + 0x0651: 0xF1, # ARABIC SHADDA + 0x0652: 0xF2, # ARABIC SUKUN + 0x066A: 0xA5, # ARABIC PERCENT SIGN + 0x0679: 0xF4, # ARABIC LETTER TTEH + 0x067E: 0xF3, # ARABIC LETTER PEH + 0x0686: 0xF5, # ARABIC LETTER TCHEH + 0x0688: 0xF9, # ARABIC LETTER DDAL + 0x0691: 0xFA, # ARABIC LETTER RREH + 0x0698: 0xFE, # ARABIC LETTER JEH + 0x06A4: 0xF7, # ARABIC LETTER VEH + 0x06AF: 0xF8, # ARABIC LETTER GAF + 0x06BA: 0x8B, # ARABIC LETTER NOON GHUNNA + 0x06D2: 0xFF, # ARABIC LETTER YEH BARREE + 0x06D5: 0xF6, # ARABIC LETTER AE + 0x06F0: 0xB0, # EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override) + 0x06F1: 0xB1, # EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override) + 0x06F2: 0xB2, # EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override) + 0x06F3: 0xB3, # EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override) + 0x06F4: 0xB4, # EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override) + 0x06F5: 0xB5, # EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override) + 0x06F6: 0xB6, # EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override) + 0x06F7: 0xB7, # EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override) + 0x06F8: 0xB8, # EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override) + 0x06F9: 0xB9, # EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override) + 0x2026: 0x93, # HORIZONTAL ELLIPSIS, right-left + 0x274A: 0xC0, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left } - Modified: python/branches/ssize_t/Lib/encodings/mac_greek.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/mac_greek.py (original) +++ python/branches/ssize_t/Lib/encodings/mac_greek.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xb9' # 0x81 -> SUPERSCRIPT ONE - u'\xb2' # 0x82 -> SUPERSCRIPT TWO - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xb3' # 0x84 -> SUPERSCRIPT THREE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0385' # 0x87 -> GREEK DIALYTIKA TONOS - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u0384' # 0x8B -> GREEK TONOS - u'\xa8' # 0x8C -> DIAERESIS - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xa3' # 0x92 -> POUND SIGN - u'\u2122' # 0x93 -> TRADE MARK SIGN - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u2022' # 0x96 -> BULLET - u'\xbd' # 0x97 -> VULGAR FRACTION ONE HALF - u'\u2030' # 0x98 -> PER MILLE SIGN - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xa6' # 0x9B -> BROKEN BAR - u'\u20ac' # 0x9C -> EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\u0393' # 0xA1 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0xA2 -> GREEK CAPITAL LETTER DELTA - u'\u0398' # 0xA3 -> GREEK CAPITAL LETTER THETA - u'\u039b' # 0xA4 -> GREEK CAPITAL LETTER LAMDA - u'\u039e' # 0xA5 -> GREEK CAPITAL LETTER XI - u'\u03a0' # 0xA6 -> GREEK CAPITAL LETTER PI - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u03a3' # 0xAA -> GREEK CAPITAL LETTER SIGMA - u'\u03aa' # 0xAB -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\xa7' # 0xAC -> SECTION SIGN - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\xb0' # 0xAE -> DEGREE SIGN - u'\xb7' # 0xAF -> MIDDLE DOT - u'\u0391' # 0xB0 -> GREEK CAPITAL LETTER ALPHA - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xB4 -> YEN SIGN - u'\u0392' # 0xB5 -> GREEK CAPITAL LETTER BETA - u'\u0395' # 0xB6 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0xB7 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0xB8 -> GREEK CAPITAL LETTER ETA - u'\u0399' # 0xB9 -> GREEK CAPITAL LETTER IOTA - u'\u039a' # 0xBA -> GREEK CAPITAL LETTER KAPPA - u'\u039c' # 0xBB -> GREEK CAPITAL LETTER MU - u'\u03a6' # 0xBC -> GREEK CAPITAL LETTER PHI - u'\u03ab' # 0xBD -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\u03a8' # 0xBE -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0xBF -> GREEK CAPITAL LETTER OMEGA - u'\u03ac' # 0xC0 -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u039d' # 0xC1 -> GREEK CAPITAL LETTER NU - u'\xac' # 0xC2 -> NOT SIGN - u'\u039f' # 0xC3 -> GREEK CAPITAL LETTER OMICRON - u'\u03a1' # 0xC4 -> GREEK CAPITAL LETTER RHO - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u03a4' # 0xC6 -> GREEK CAPITAL LETTER TAU - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\u03a5' # 0xCB -> GREEK CAPITAL LETTER UPSILON - u'\u03a7' # 0xCC -> GREEK CAPITAL LETTER CHI - u'\u0386' # 0xCD -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\u0388' # 0xCE -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xD0 -> EN DASH - u'\u2015' # 0xD1 -> HORIZONTAL BAR - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u0389' # 0xD7 -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0xD8 -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\u038c' # 0xD9 -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\u038e' # 0xDA -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u03ad' # 0xDB -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0xDC -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03af' # 0xDD -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03cc' # 0xDE -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u038f' # 0xDF -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\u03cd' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA - u'\u03c8' # 0xE3 -> GREEK SMALL LETTER PSI - u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON - u'\u03c6' # 0xE6 -> GREEK SMALL LETTER PHI - u'\u03b3' # 0xE7 -> GREEK SMALL LETTER GAMMA - u'\u03b7' # 0xE8 -> GREEK SMALL LETTER ETA - u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA - u'\u03be' # 0xEA -> GREEK SMALL LETTER XI - u'\u03ba' # 0xEB -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0xEC -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0xED -> GREEK SMALL LETTER MU - u'\u03bd' # 0xEE -> GREEK SMALL LETTER NU - u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI - u'\u03ce' # 0xF1 -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\u03c1' # 0xF2 -> GREEK SMALL LETTER RHO - u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA - u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU - u'\u03b8' # 0xF5 -> GREEK SMALL LETTER THETA - u'\u03c9' # 0xF6 -> GREEK SMALL LETTER OMEGA - u'\u03c2' # 0xF7 -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c7' # 0xF8 -> GREEK SMALL LETTER CHI - u'\u03c5' # 0xF9 -> GREEK SMALL LETTER UPSILON - u'\u03b6' # 0xFA -> GREEK SMALL LETTER ZETA - u'\u03ca' # 0xFB -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03cb' # 0xFC -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u0390' # 0xFD -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u03b0' # 0xFE -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\xad' # 0xFF -> SOFT HYPHEN # before Mac OS 9.2.2, was undefined + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xb9' # 0x81 -> SUPERSCRIPT ONE + u'\xb2' # 0x82 -> SUPERSCRIPT TWO + u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xb3' # 0x84 -> SUPERSCRIPT THREE + u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0385' # 0x87 -> GREEK DIALYTIKA TONOS + u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u0384' # 0x8B -> GREEK TONOS + u'\xa8' # 0x8C -> DIAERESIS + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xa3' # 0x92 -> POUND SIGN + u'\u2122' # 0x93 -> TRADE MARK SIGN + u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u2022' # 0x96 -> BULLET + u'\xbd' # 0x97 -> VULGAR FRACTION ONE HALF + u'\u2030' # 0x98 -> PER MILLE SIGN + u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xa6' # 0x9B -> BROKEN BAR + u'\u20ac' # 0x9C -> EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\u0393' # 0xA1 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0xA2 -> GREEK CAPITAL LETTER DELTA + u'\u0398' # 0xA3 -> GREEK CAPITAL LETTER THETA + u'\u039b' # 0xA4 -> GREEK CAPITAL LETTER LAMDA + u'\u039e' # 0xA5 -> GREEK CAPITAL LETTER XI + u'\u03a0' # 0xA6 -> GREEK CAPITAL LETTER PI + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u03a3' # 0xAA -> GREEK CAPITAL LETTER SIGMA + u'\u03aa' # 0xAB -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\xa7' # 0xAC -> SECTION SIGN + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\xb0' # 0xAE -> DEGREE SIGN + u'\xb7' # 0xAF -> MIDDLE DOT + u'\u0391' # 0xB0 -> GREEK CAPITAL LETTER ALPHA + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0xB4 -> YEN SIGN + u'\u0392' # 0xB5 -> GREEK CAPITAL LETTER BETA + u'\u0395' # 0xB6 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0xB7 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0xB8 -> GREEK CAPITAL LETTER ETA + u'\u0399' # 0xB9 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0xBA -> GREEK CAPITAL LETTER KAPPA + u'\u039c' # 0xBB -> GREEK CAPITAL LETTER MU + u'\u03a6' # 0xBC -> GREEK CAPITAL LETTER PHI + u'\u03ab' # 0xBD -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\u03a8' # 0xBE -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0xBF -> GREEK CAPITAL LETTER OMEGA + u'\u03ac' # 0xC0 -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u039d' # 0xC1 -> GREEK CAPITAL LETTER NU + u'\xac' # 0xC2 -> NOT SIGN + u'\u039f' # 0xC3 -> GREEK CAPITAL LETTER OMICRON + u'\u03a1' # 0xC4 -> GREEK CAPITAL LETTER RHO + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u03a4' # 0xC6 -> GREEK CAPITAL LETTER TAU + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\u03a5' # 0xCB -> GREEK CAPITAL LETTER UPSILON + u'\u03a7' # 0xCC -> GREEK CAPITAL LETTER CHI + u'\u0386' # 0xCD -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\u0388' # 0xCE -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u2013' # 0xD0 -> EN DASH + u'\u2015' # 0xD1 -> HORIZONTAL BAR + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u0389' # 0xD7 -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0xD8 -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\u038c' # 0xD9 -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\u038e' # 0xDA -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u03ad' # 0xDB -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0xDC -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0xDD -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03cc' # 0xDE -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u038f' # 0xDF -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\u03cd' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA + u'\u03c8' # 0xE3 -> GREEK SMALL LETTER PSI + u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON + u'\u03c6' # 0xE6 -> GREEK SMALL LETTER PHI + u'\u03b3' # 0xE7 -> GREEK SMALL LETTER GAMMA + u'\u03b7' # 0xE8 -> GREEK SMALL LETTER ETA + u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA + u'\u03be' # 0xEA -> GREEK SMALL LETTER XI + u'\u03ba' # 0xEB -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0xEC -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0xED -> GREEK SMALL LETTER MU + u'\u03bd' # 0xEE -> GREEK SMALL LETTER NU + u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI + u'\u03ce' # 0xF1 -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u03c1' # 0xF2 -> GREEK SMALL LETTER RHO + u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA + u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU + u'\u03b8' # 0xF5 -> GREEK SMALL LETTER THETA + u'\u03c9' # 0xF6 -> GREEK SMALL LETTER OMEGA + u'\u03c2' # 0xF7 -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c7' # 0xF8 -> GREEK SMALL LETTER CHI + u'\u03c5' # 0xF9 -> GREEK SMALL LETTER UPSILON + u'\u03b6' # 0xFA -> GREEK SMALL LETTER ZETA + u'\u03ca' # 0xFB -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03cb' # 0xFC -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u0390' # 0xFD -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u03b0' # 0xFE -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\xad' # 0xFF -> SOFT HYPHEN # before Mac OS 9.2.2, was undefined ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0xCA, # NO-BREAK SPACE - 0x00A3: 0x92, # POUND SIGN - 0x00A5: 0xB4, # YEN SIGN - 0x00A6: 0x9B, # BROKEN BAR - 0x00A7: 0xAC, # SECTION SIGN - 0x00A8: 0x8C, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xC2, # NOT SIGN - 0x00AD: 0xFF, # SOFT HYPHEN # before Mac OS 9.2.2, was undefined - 0x00AE: 0xA8, # REGISTERED SIGN - 0x00B0: 0xAE, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0x82, # SUPERSCRIPT TWO - 0x00B3: 0x84, # SUPERSCRIPT THREE - 0x00B7: 0xAF, # MIDDLE DOT - 0x00B9: 0x81, # SUPERSCRIPT ONE - 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BD: 0x97, # VULGAR FRACTION ONE HALF - 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S - 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xD6, # DIVISION SIGN - 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE - 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0153: 0xCF, # LATIN SMALL LIGATURE OE - 0x0384: 0x8B, # GREEK TONOS - 0x0385: 0x87, # GREEK DIALYTIKA TONOS - 0x0386: 0xCD, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0388: 0xCE, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0389: 0xD7, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038A: 0xD8, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038C: 0xD9, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038E: 0xDA, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038F: 0xDF, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0390: 0xFD, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x0391: 0xB0, # GREEK CAPITAL LETTER ALPHA - 0x0392: 0xB5, # GREEK CAPITAL LETTER BETA - 0x0393: 0xA1, # GREEK CAPITAL LETTER GAMMA - 0x0394: 0xA2, # GREEK CAPITAL LETTER DELTA - 0x0395: 0xB6, # GREEK CAPITAL LETTER EPSILON - 0x0396: 0xB7, # GREEK CAPITAL LETTER ZETA - 0x0397: 0xB8, # GREEK CAPITAL LETTER ETA - 0x0398: 0xA3, # GREEK CAPITAL LETTER THETA - 0x0399: 0xB9, # GREEK CAPITAL LETTER IOTA - 0x039A: 0xBA, # GREEK CAPITAL LETTER KAPPA - 0x039B: 0xA4, # GREEK CAPITAL LETTER LAMDA - 0x039C: 0xBB, # GREEK CAPITAL LETTER MU - 0x039D: 0xC1, # GREEK CAPITAL LETTER NU - 0x039E: 0xA5, # GREEK CAPITAL LETTER XI - 0x039F: 0xC3, # GREEK CAPITAL LETTER OMICRON - 0x03A0: 0xA6, # GREEK CAPITAL LETTER PI - 0x03A1: 0xC4, # GREEK CAPITAL LETTER RHO - 0x03A3: 0xAA, # GREEK CAPITAL LETTER SIGMA - 0x03A4: 0xC6, # GREEK CAPITAL LETTER TAU - 0x03A5: 0xCB, # GREEK CAPITAL LETTER UPSILON - 0x03A6: 0xBC, # GREEK CAPITAL LETTER PHI - 0x03A7: 0xCC, # GREEK CAPITAL LETTER CHI - 0x03A8: 0xBE, # GREEK CAPITAL LETTER PSI - 0x03A9: 0xBF, # GREEK CAPITAL LETTER OMEGA - 0x03AA: 0xAB, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03AB: 0xBD, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03AC: 0xC0, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03AD: 0xDB, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03AE: 0xDC, # GREEK SMALL LETTER ETA WITH TONOS - 0x03AF: 0xDD, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03B0: 0xFE, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x03B1: 0xE1, # GREEK SMALL LETTER ALPHA - 0x03B2: 0xE2, # GREEK SMALL LETTER BETA - 0x03B3: 0xE7, # GREEK SMALL LETTER GAMMA - 0x03B4: 0xE4, # GREEK SMALL LETTER DELTA - 0x03B5: 0xE5, # GREEK SMALL LETTER EPSILON - 0x03B6: 0xFA, # GREEK SMALL LETTER ZETA - 0x03B7: 0xE8, # GREEK SMALL LETTER ETA - 0x03B8: 0xF5, # GREEK SMALL LETTER THETA - 0x03B9: 0xE9, # GREEK SMALL LETTER IOTA - 0x03BA: 0xEB, # GREEK SMALL LETTER KAPPA - 0x03BB: 0xEC, # GREEK SMALL LETTER LAMDA - 0x03BC: 0xED, # GREEK SMALL LETTER MU - 0x03BD: 0xEE, # GREEK SMALL LETTER NU - 0x03BE: 0xEA, # GREEK SMALL LETTER XI - 0x03BF: 0xEF, # GREEK SMALL LETTER OMICRON - 0x03C0: 0xF0, # GREEK SMALL LETTER PI - 0x03C1: 0xF2, # GREEK SMALL LETTER RHO - 0x03C2: 0xF7, # GREEK SMALL LETTER FINAL SIGMA - 0x03C3: 0xF3, # GREEK SMALL LETTER SIGMA - 0x03C4: 0xF4, # GREEK SMALL LETTER TAU - 0x03C5: 0xF9, # GREEK SMALL LETTER UPSILON - 0x03C6: 0xE6, # GREEK SMALL LETTER PHI - 0x03C7: 0xF8, # GREEK SMALL LETTER CHI - 0x03C8: 0xE3, # GREEK SMALL LETTER PSI - 0x03C9: 0xF6, # GREEK SMALL LETTER OMEGA - 0x03CA: 0xFB, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03CB: 0xFC, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03CC: 0xDE, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03CD: 0xE0, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03CE: 0xF1, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x2013: 0xD0, # EN DASH - 0x2015: 0xD1, # HORIZONTAL BAR - 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK - 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK - 0x2020: 0xA0, # DAGGER - 0x2022: 0x96, # BULLET - 0x2026: 0xC9, # HORIZONTAL ELLIPSIS - 0x2030: 0x98, # PER MILLE SIGN - 0x20AC: 0x9C, # EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN - 0x2122: 0x93, # TRADE MARK SIGN - 0x2248: 0xC5, # ALMOST EQUAL TO - 0x2260: 0xAD, # NOT EQUAL TO - 0x2264: 0xB2, # LESS-THAN OR EQUAL TO - 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO + 0x0000: 0x00, # CONTROL CHARACTER + 0x0001: 0x01, # CONTROL CHARACTER + 0x0002: 0x02, # CONTROL CHARACTER + 0x0003: 0x03, # CONTROL CHARACTER + 0x0004: 0x04, # CONTROL CHARACTER + 0x0005: 0x05, # CONTROL CHARACTER + 0x0006: 0x06, # CONTROL CHARACTER + 0x0007: 0x07, # CONTROL CHARACTER + 0x0008: 0x08, # CONTROL CHARACTER + 0x0009: 0x09, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER + 0x0010: 0x10, # CONTROL CHARACTER + 0x0011: 0x11, # CONTROL CHARACTER + 0x0012: 0x12, # CONTROL CHARACTER + 0x0013: 0x13, # CONTROL CHARACTER + 0x0014: 0x14, # CONTROL CHARACTER + 0x0015: 0x15, # CONTROL CHARACTER + 0x0016: 0x16, # CONTROL CHARACTER + 0x0017: 0x17, # CONTROL CHARACTER + 0x0018: 0x18, # CONTROL CHARACTER + 0x0019: 0x19, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0xCA, # NO-BREAK SPACE + 0x00A3: 0x92, # POUND SIGN + 0x00A5: 0xB4, # YEN SIGN + 0x00A6: 0x9B, # BROKEN BAR + 0x00A7: 0xAC, # SECTION SIGN + 0x00A8: 0x8C, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xC2, # NOT SIGN + 0x00AD: 0xFF, # SOFT HYPHEN # before Mac OS 9.2.2, was undefined + 0x00AE: 0xA8, # REGISTERED SIGN + 0x00B0: 0xAE, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0x82, # SUPERSCRIPT TWO + 0x00B3: 0x84, # SUPERSCRIPT THREE + 0x00B7: 0xAF, # MIDDLE DOT + 0x00B9: 0x81, # SUPERSCRIPT ONE + 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BD: 0x97, # VULGAR FRACTION ONE HALF + 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S + 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE + 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xD6, # DIVISION SIGN + 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE + 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0153: 0xCF, # LATIN SMALL LIGATURE OE + 0x0384: 0x8B, # GREEK TONOS + 0x0385: 0x87, # GREEK DIALYTIKA TONOS + 0x0386: 0xCD, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0xCE, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0xD7, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038A: 0xD8, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038C: 0xD9, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038E: 0xDA, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038F: 0xDF, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0xFD, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x0391: 0xB0, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0xB5, # GREEK CAPITAL LETTER BETA + 0x0393: 0xA1, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0xA2, # GREEK CAPITAL LETTER DELTA + 0x0395: 0xB6, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0xB7, # GREEK CAPITAL LETTER ZETA + 0x0397: 0xB8, # GREEK CAPITAL LETTER ETA + 0x0398: 0xA3, # GREEK CAPITAL LETTER THETA + 0x0399: 0xB9, # GREEK CAPITAL LETTER IOTA + 0x039A: 0xBA, # GREEK CAPITAL LETTER KAPPA + 0x039B: 0xA4, # GREEK CAPITAL LETTER LAMDA + 0x039C: 0xBB, # GREEK CAPITAL LETTER MU + 0x039D: 0xC1, # GREEK CAPITAL LETTER NU + 0x039E: 0xA5, # GREEK CAPITAL LETTER XI + 0x039F: 0xC3, # GREEK CAPITAL LETTER OMICRON + 0x03A0: 0xA6, # GREEK CAPITAL LETTER PI + 0x03A1: 0xC4, # GREEK CAPITAL LETTER RHO + 0x03A3: 0xAA, # GREEK CAPITAL LETTER SIGMA + 0x03A4: 0xC6, # GREEK CAPITAL LETTER TAU + 0x03A5: 0xCB, # GREEK CAPITAL LETTER UPSILON + 0x03A6: 0xBC, # GREEK CAPITAL LETTER PHI + 0x03A7: 0xCC, # GREEK CAPITAL LETTER CHI + 0x03A8: 0xBE, # GREEK CAPITAL LETTER PSI + 0x03A9: 0xBF, # GREEK CAPITAL LETTER OMEGA + 0x03AA: 0xAB, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03AB: 0xBD, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03AC: 0xC0, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03AD: 0xDB, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03AE: 0xDC, # GREEK SMALL LETTER ETA WITH TONOS + 0x03AF: 0xDD, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03B0: 0xFE, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03B1: 0xE1, # GREEK SMALL LETTER ALPHA + 0x03B2: 0xE2, # GREEK SMALL LETTER BETA + 0x03B3: 0xE7, # GREEK SMALL LETTER GAMMA + 0x03B4: 0xE4, # GREEK SMALL LETTER DELTA + 0x03B5: 0xE5, # GREEK SMALL LETTER EPSILON + 0x03B6: 0xFA, # GREEK SMALL LETTER ZETA + 0x03B7: 0xE8, # GREEK SMALL LETTER ETA + 0x03B8: 0xF5, # GREEK SMALL LETTER THETA + 0x03B9: 0xE9, # GREEK SMALL LETTER IOTA + 0x03BA: 0xEB, # GREEK SMALL LETTER KAPPA + 0x03BB: 0xEC, # GREEK SMALL LETTER LAMDA + 0x03BC: 0xED, # GREEK SMALL LETTER MU + 0x03BD: 0xEE, # GREEK SMALL LETTER NU + 0x03BE: 0xEA, # GREEK SMALL LETTER XI + 0x03BF: 0xEF, # GREEK SMALL LETTER OMICRON + 0x03C0: 0xF0, # GREEK SMALL LETTER PI + 0x03C1: 0xF2, # GREEK SMALL LETTER RHO + 0x03C2: 0xF7, # GREEK SMALL LETTER FINAL SIGMA + 0x03C3: 0xF3, # GREEK SMALL LETTER SIGMA + 0x03C4: 0xF4, # GREEK SMALL LETTER TAU + 0x03C5: 0xF9, # GREEK SMALL LETTER UPSILON + 0x03C6: 0xE6, # GREEK SMALL LETTER PHI + 0x03C7: 0xF8, # GREEK SMALL LETTER CHI + 0x03C8: 0xE3, # GREEK SMALL LETTER PSI + 0x03C9: 0xF6, # GREEK SMALL LETTER OMEGA + 0x03CA: 0xFB, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03CB: 0xFC, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03CC: 0xDE, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03CD: 0xE0, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03CE: 0xF1, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2013: 0xD0, # EN DASH + 0x2015: 0xD1, # HORIZONTAL BAR + 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK + 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK + 0x2020: 0xA0, # DAGGER + 0x2022: 0x96, # BULLET + 0x2026: 0xC9, # HORIZONTAL ELLIPSIS + 0x2030: 0x98, # PER MILLE SIGN + 0x20AC: 0x9C, # EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN + 0x2122: 0x93, # TRADE MARK SIGN + 0x2248: 0xC5, # ALMOST EQUAL TO + 0x2260: 0xAD, # NOT EQUAL TO + 0x2264: 0xB2, # LESS-THAN OR EQUAL TO + 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO } - Modified: python/branches/ssize_t/Lib/encodings/mac_iceland.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/mac_iceland.py (original) +++ python/branches/ssize_t/Lib/encodings/mac_iceland.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xdd' # 0xA0 -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\xb4' # 0xAB -> ACUTE ACCENT - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE - u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xB4 -> YEN SIGN - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u220f' # 0xB8 -> N-ARY PRODUCT - u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI - u'\u222b' # 0xBA -> INTEGRAL - u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA - u'\xe6' # 0xBE -> LATIN SMALL LETTER AE - u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE - u'\xbf' # 0xC0 -> INVERTED QUESTION MARK - u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u2044' # 0xDA -> FRACTION SLASH - u'\u20ac' # 0xDB -> EURO SIGN - u'\xd0' # 0xDC -> LATIN CAPITAL LETTER ETH - u'\xf0' # 0xDD -> LATIN SMALL LETTER ETH - u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN - u'\xfe' # 0xDF -> LATIN SMALL LETTER THORN - u'\xfd' # 0xE0 -> LATIN SMALL LETTER Y WITH ACUTE - u'\xb7' # 0xE1 -> MIDDLE DOT - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xE4 -> PER MILLE SIGN - u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\uf8ff' # 0xF0 -> Apple logo - u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I - u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xF7 -> SMALL TILDE - u'\xaf' # 0xF8 -> MACRON - u'\u02d8' # 0xF9 -> BREVE - u'\u02d9' # 0xFA -> DOT ABOVE - u'\u02da' # 0xFB -> RING ABOVE - u'\xb8' # 0xFC -> CEDILLA - u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT - u'\u02db' # 0xFE -> OGONEK - u'\u02c7' # 0xFF -> CARON + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE + u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE + u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xdd' # 0xA0 -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\xb4' # 0xAB -> ACUTE ACCENT + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE + u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0xB4 -> YEN SIGN + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u220f' # 0xB8 -> N-ARY PRODUCT + u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI + u'\u222b' # 0xBA -> INTEGRAL + u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR + u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA + u'\xe6' # 0xBE -> LATIN SMALL LETTER AE + u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0xC0 -> INVERTED QUESTION MARK + u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u2044' # 0xDA -> FRACTION SLASH + u'\u20ac' # 0xDB -> EURO SIGN + u'\xd0' # 0xDC -> LATIN CAPITAL LETTER ETH + u'\xf0' # 0xDD -> LATIN SMALL LETTER ETH + u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN + u'\xfe' # 0xDF -> LATIN SMALL LETTER THORN + u'\xfd' # 0xE0 -> LATIN SMALL LETTER Y WITH ACUTE + u'\xb7' # 0xE1 -> MIDDLE DOT + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0xE4 -> PER MILLE SIGN + u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0xF0 -> Apple logo + u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I + u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0xF7 -> SMALL TILDE + u'\xaf' # 0xF8 -> MACRON + u'\u02d8' # 0xF9 -> BREVE + u'\u02d9' # 0xFA -> DOT ABOVE + u'\u02da' # 0xFB -> RING ABOVE + u'\xb8' # 0xFC -> CEDILLA + u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT + u'\u02db' # 0xFE -> OGONEK + u'\u02c7' # 0xFF -> CARON ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0xCA, # NO-BREAK SPACE - 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A5: 0xB4, # YEN SIGN - 0x00A7: 0xA4, # SECTION SIGN - 0x00A8: 0xAC, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xC2, # NOT SIGN - 0x00AE: 0xA8, # REGISTERED SIGN - 0x00AF: 0xF8, # MACRON - 0x00B0: 0xA1, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B4: 0xAB, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xA6, # PILCROW SIGN - 0x00B7: 0xE1, # MIDDLE DOT - 0x00B8: 0xFC, # CEDILLA - 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BF: 0xC0, # INVERTED QUESTION MARK - 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xAE, # LATIN CAPITAL LETTER AE - 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D0: 0xDC, # LATIN CAPITAL LETTER ETH - 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xA0, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN - 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S - 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xBE, # LATIN SMALL LETTER AE - 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F0: 0xDD, # LATIN SMALL LETTER ETH - 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xD6, # DIVISION SIGN - 0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xE0, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FE: 0xDF, # LATIN SMALL LETTER THORN - 0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xCF, # LATIN SMALL LIGATURE OE - 0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK - 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02C7: 0xFF, # CARON - 0x02D8: 0xF9, # BREVE - 0x02D9: 0xFA, # DOT ABOVE - 0x02DA: 0xFB, # RING ABOVE - 0x02DB: 0xFE, # OGONEK - 0x02DC: 0xF7, # SMALL TILDE - 0x02DD: 0xFD, # DOUBLE ACUTE ACCENT - 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA - 0x03C0: 0xB9, # GREEK SMALL LETTER PI - 0x2013: 0xD0, # EN DASH - 0x2014: 0xD1, # EM DASH - 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK - 0x2022: 0xA5, # BULLET - 0x2026: 0xC9, # HORIZONTAL ELLIPSIS - 0x2030: 0xE4, # PER MILLE SIGN - 0x2044: 0xDA, # FRACTION SLASH - 0x20AC: 0xDB, # EURO SIGN - 0x2122: 0xAA, # TRADE MARK SIGN - 0x2202: 0xB6, # PARTIAL DIFFERENTIAL - 0x2206: 0xC6, # INCREMENT - 0x220F: 0xB8, # N-ARY PRODUCT - 0x2211: 0xB7, # N-ARY SUMMATION - 0x221A: 0xC3, # SQUARE ROOT - 0x221E: 0xB0, # INFINITY - 0x222B: 0xBA, # INTEGRAL - 0x2248: 0xC5, # ALMOST EQUAL TO - 0x2260: 0xAD, # NOT EQUAL TO - 0x2264: 0xB2, # LESS-THAN OR EQUAL TO - 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO - 0x25CA: 0xD7, # LOZENGE - 0xF8FF: 0xF0, # Apple logo + 0x0000: 0x00, # CONTROL CHARACTER + 0x0001: 0x01, # CONTROL CHARACTER + 0x0002: 0x02, # CONTROL CHARACTER + 0x0003: 0x03, # CONTROL CHARACTER + 0x0004: 0x04, # CONTROL CHARACTER + 0x0005: 0x05, # CONTROL CHARACTER + 0x0006: 0x06, # CONTROL CHARACTER + 0x0007: 0x07, # CONTROL CHARACTER + 0x0008: 0x08, # CONTROL CHARACTER + 0x0009: 0x09, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER + 0x0010: 0x10, # CONTROL CHARACTER + 0x0011: 0x11, # CONTROL CHARACTER + 0x0012: 0x12, # CONTROL CHARACTER + 0x0013: 0x13, # CONTROL CHARACTER + 0x0014: 0x14, # CONTROL CHARACTER + 0x0015: 0x15, # CONTROL CHARACTER + 0x0016: 0x16, # CONTROL CHARACTER + 0x0017: 0x17, # CONTROL CHARACTER + 0x0018: 0x18, # CONTROL CHARACTER + 0x0019: 0x19, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0xCA, # NO-BREAK SPACE + 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A5: 0xB4, # YEN SIGN + 0x00A7: 0xA4, # SECTION SIGN + 0x00A8: 0xAC, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xC2, # NOT SIGN + 0x00AE: 0xA8, # REGISTERED SIGN + 0x00AF: 0xF8, # MACRON + 0x00B0: 0xA1, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B4: 0xAB, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xA6, # PILCROW SIGN + 0x00B7: 0xE1, # MIDDLE DOT + 0x00B8: 0xFC, # CEDILLA + 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BF: 0xC0, # INVERTED QUESTION MARK + 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xAE, # LATIN CAPITAL LETTER AE + 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D0: 0xDC, # LATIN CAPITAL LETTER ETH + 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xA0, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN + 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S + 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xBE, # LATIN SMALL LETTER AE + 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F0: 0xDD, # LATIN SMALL LETTER ETH + 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xD6, # DIVISION SIGN + 0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xE0, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FE: 0xDF, # LATIN SMALL LETTER THORN + 0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE + 0x0153: 0xCF, # LATIN SMALL LIGATURE OE + 0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK + 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02C7: 0xFF, # CARON + 0x02D8: 0xF9, # BREVE + 0x02D9: 0xFA, # DOT ABOVE + 0x02DA: 0xFB, # RING ABOVE + 0x02DB: 0xFE, # OGONEK + 0x02DC: 0xF7, # SMALL TILDE + 0x02DD: 0xFD, # DOUBLE ACUTE ACCENT + 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA + 0x03C0: 0xB9, # GREEK SMALL LETTER PI + 0x2013: 0xD0, # EN DASH + 0x2014: 0xD1, # EM DASH + 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK + 0x2022: 0xA5, # BULLET + 0x2026: 0xC9, # HORIZONTAL ELLIPSIS + 0x2030: 0xE4, # PER MILLE SIGN + 0x2044: 0xDA, # FRACTION SLASH + 0x20AC: 0xDB, # EURO SIGN + 0x2122: 0xAA, # TRADE MARK SIGN + 0x2202: 0xB6, # PARTIAL DIFFERENTIAL + 0x2206: 0xC6, # INCREMENT + 0x220F: 0xB8, # N-ARY PRODUCT + 0x2211: 0xB7, # N-ARY SUMMATION + 0x221A: 0xC3, # SQUARE ROOT + 0x221E: 0xB0, # INFINITY + 0x222B: 0xBA, # INTEGRAL + 0x2248: 0xC5, # ALMOST EQUAL TO + 0x2260: 0xAD, # NOT EQUAL TO + 0x2264: 0xB2, # LESS-THAN OR EQUAL TO + 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO + 0x25CA: 0xD7, # LOZENGE + 0xF8FF: 0xF0, # Apple logo } - Modified: python/branches/ssize_t/Lib/encodings/mac_roman.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/mac_roman.py (original) +++ python/branches/ssize_t/Lib/encodings/mac_roman.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\xb4' # 0xAB -> ACUTE ACCENT - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE - u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xB4 -> YEN SIGN - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u220f' # 0xB8 -> N-ARY PRODUCT - u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI - u'\u222b' # 0xBA -> INTEGRAL - u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA - u'\xe6' # 0xBE -> LATIN SMALL LETTER AE - u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE - u'\xbf' # 0xC0 -> INVERTED QUESTION MARK - u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u2044' # 0xDA -> FRACTION SLASH - u'\u20ac' # 0xDB -> EURO SIGN - u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\ufb01' # 0xDE -> LATIN SMALL LIGATURE FI - u'\ufb02' # 0xDF -> LATIN SMALL LIGATURE FL - u'\u2021' # 0xE0 -> DOUBLE DAGGER - u'\xb7' # 0xE1 -> MIDDLE DOT - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xE4 -> PER MILLE SIGN - u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\uf8ff' # 0xF0 -> Apple logo - u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I - u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xF7 -> SMALL TILDE - u'\xaf' # 0xF8 -> MACRON - u'\u02d8' # 0xF9 -> BREVE - u'\u02d9' # 0xFA -> DOT ABOVE - u'\u02da' # 0xFB -> RING ABOVE - u'\xb8' # 0xFC -> CEDILLA - u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT - u'\u02db' # 0xFE -> OGONEK - u'\u02c7' # 0xFF -> CARON + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE + u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE + u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\xb4' # 0xAB -> ACUTE ACCENT + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE + u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0xB4 -> YEN SIGN + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u220f' # 0xB8 -> N-ARY PRODUCT + u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI + u'\u222b' # 0xBA -> INTEGRAL + u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR + u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA + u'\xe6' # 0xBE -> LATIN SMALL LETTER AE + u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0xC0 -> INVERTED QUESTION MARK + u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u2044' # 0xDA -> FRACTION SLASH + u'\u20ac' # 0xDB -> EURO SIGN + u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufb01' # 0xDE -> LATIN SMALL LIGATURE FI + u'\ufb02' # 0xDF -> LATIN SMALL LIGATURE FL + u'\u2021' # 0xE0 -> DOUBLE DAGGER + u'\xb7' # 0xE1 -> MIDDLE DOT + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0xE4 -> PER MILLE SIGN + u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0xF0 -> Apple logo + u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I + u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0xF7 -> SMALL TILDE + u'\xaf' # 0xF8 -> MACRON + u'\u02d8' # 0xF9 -> BREVE + u'\u02d9' # 0xFA -> DOT ABOVE + u'\u02da' # 0xFB -> RING ABOVE + u'\xb8' # 0xFC -> CEDILLA + u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT + u'\u02db' # 0xFE -> OGONEK + u'\u02c7' # 0xFF -> CARON ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0xCA, # NO-BREAK SPACE - 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A5: 0xB4, # YEN SIGN - 0x00A7: 0xA4, # SECTION SIGN - 0x00A8: 0xAC, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xC2, # NOT SIGN - 0x00AE: 0xA8, # REGISTERED SIGN - 0x00AF: 0xF8, # MACRON - 0x00B0: 0xA1, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B4: 0xAB, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xA6, # PILCROW SIGN - 0x00B7: 0xE1, # MIDDLE DOT - 0x00B8: 0xFC, # CEDILLA - 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BF: 0xC0, # INVERTED QUESTION MARK - 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xAE, # LATIN CAPITAL LETTER AE - 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S - 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xBE, # LATIN SMALL LETTER AE - 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xD6, # DIVISION SIGN - 0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xCF, # LATIN SMALL LIGATURE OE - 0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK - 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02C7: 0xFF, # CARON - 0x02D8: 0xF9, # BREVE - 0x02D9: 0xFA, # DOT ABOVE - 0x02DA: 0xFB, # RING ABOVE - 0x02DB: 0xFE, # OGONEK - 0x02DC: 0xF7, # SMALL TILDE - 0x02DD: 0xFD, # DOUBLE ACUTE ACCENT - 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA - 0x03C0: 0xB9, # GREEK SMALL LETTER PI - 0x2013: 0xD0, # EN DASH - 0x2014: 0xD1, # EM DASH - 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xA0, # DAGGER - 0x2021: 0xE0, # DOUBLE DAGGER - 0x2022: 0xA5, # BULLET - 0x2026: 0xC9, # HORIZONTAL ELLIPSIS - 0x2030: 0xE4, # PER MILLE SIGN - 0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x2044: 0xDA, # FRACTION SLASH - 0x20AC: 0xDB, # EURO SIGN - 0x2122: 0xAA, # TRADE MARK SIGN - 0x2202: 0xB6, # PARTIAL DIFFERENTIAL - 0x2206: 0xC6, # INCREMENT - 0x220F: 0xB8, # N-ARY PRODUCT - 0x2211: 0xB7, # N-ARY SUMMATION - 0x221A: 0xC3, # SQUARE ROOT - 0x221E: 0xB0, # INFINITY - 0x222B: 0xBA, # INTEGRAL - 0x2248: 0xC5, # ALMOST EQUAL TO - 0x2260: 0xAD, # NOT EQUAL TO - 0x2264: 0xB2, # LESS-THAN OR EQUAL TO - 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO - 0x25CA: 0xD7, # LOZENGE - 0xF8FF: 0xF0, # Apple logo - 0xFB01: 0xDE, # LATIN SMALL LIGATURE FI - 0xFB02: 0xDF, # LATIN SMALL LIGATURE FL + 0x0000: 0x00, # CONTROL CHARACTER + 0x0001: 0x01, # CONTROL CHARACTER + 0x0002: 0x02, # CONTROL CHARACTER + 0x0003: 0x03, # CONTROL CHARACTER + 0x0004: 0x04, # CONTROL CHARACTER + 0x0005: 0x05, # CONTROL CHARACTER + 0x0006: 0x06, # CONTROL CHARACTER + 0x0007: 0x07, # CONTROL CHARACTER + 0x0008: 0x08, # CONTROL CHARACTER + 0x0009: 0x09, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER + 0x0010: 0x10, # CONTROL CHARACTER + 0x0011: 0x11, # CONTROL CHARACTER + 0x0012: 0x12, # CONTROL CHARACTER + 0x0013: 0x13, # CONTROL CHARACTER + 0x0014: 0x14, # CONTROL CHARACTER + 0x0015: 0x15, # CONTROL CHARACTER + 0x0016: 0x16, # CONTROL CHARACTER + 0x0017: 0x17, # CONTROL CHARACTER + 0x0018: 0x18, # CONTROL CHARACTER + 0x0019: 0x19, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0xCA, # NO-BREAK SPACE + 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A5: 0xB4, # YEN SIGN + 0x00A7: 0xA4, # SECTION SIGN + 0x00A8: 0xAC, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xC2, # NOT SIGN + 0x00AE: 0xA8, # REGISTERED SIGN + 0x00AF: 0xF8, # MACRON + 0x00B0: 0xA1, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B4: 0xAB, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xA6, # PILCROW SIGN + 0x00B7: 0xE1, # MIDDLE DOT + 0x00B8: 0xFC, # CEDILLA + 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BF: 0xC0, # INVERTED QUESTION MARK + 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xAE, # LATIN CAPITAL LETTER AE + 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S + 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xBE, # LATIN SMALL LETTER AE + 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xD6, # DIVISION SIGN + 0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE + 0x0153: 0xCF, # LATIN SMALL LIGATURE OE + 0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK + 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02C7: 0xFF, # CARON + 0x02D8: 0xF9, # BREVE + 0x02D9: 0xFA, # DOT ABOVE + 0x02DA: 0xFB, # RING ABOVE + 0x02DB: 0xFE, # OGONEK + 0x02DC: 0xF7, # SMALL TILDE + 0x02DD: 0xFD, # DOUBLE ACUTE ACCENT + 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA + 0x03C0: 0xB9, # GREEK SMALL LETTER PI + 0x2013: 0xD0, # EN DASH + 0x2014: 0xD1, # EM DASH + 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0xA0, # DAGGER + 0x2021: 0xE0, # DOUBLE DAGGER + 0x2022: 0xA5, # BULLET + 0x2026: 0xC9, # HORIZONTAL ELLIPSIS + 0x2030: 0xE4, # PER MILLE SIGN + 0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x2044: 0xDA, # FRACTION SLASH + 0x20AC: 0xDB, # EURO SIGN + 0x2122: 0xAA, # TRADE MARK SIGN + 0x2202: 0xB6, # PARTIAL DIFFERENTIAL + 0x2206: 0xC6, # INCREMENT + 0x220F: 0xB8, # N-ARY PRODUCT + 0x2211: 0xB7, # N-ARY SUMMATION + 0x221A: 0xC3, # SQUARE ROOT + 0x221E: 0xB0, # INFINITY + 0x222B: 0xBA, # INTEGRAL + 0x2248: 0xC5, # ALMOST EQUAL TO + 0x2260: 0xAD, # NOT EQUAL TO + 0x2264: 0xB2, # LESS-THAN OR EQUAL TO + 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO + 0x25CA: 0xD7, # LOZENGE + 0xF8FF: 0xF0, # Apple logo + 0xFB01: 0xDE, # LATIN SMALL LIGATURE FI + 0xFB02: 0xDF, # LATIN SMALL LIGATURE FL } - Modified: python/branches/ssize_t/Lib/encodings/mac_romanian.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/mac_romanian.py (original) +++ python/branches/ssize_t/Lib/encodings/mac_romanian.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\xb4' # 0xAB -> ACUTE ACCENT - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\u0102' # 0xAE -> LATIN CAPITAL LETTER A WITH BREVE - u'\u0218' # 0xAF -> LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xB4 -> YEN SIGN - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u220f' # 0xB8 -> N-ARY PRODUCT - u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI - u'\u222b' # 0xBA -> INTEGRAL - u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA - u'\u0103' # 0xBE -> LATIN SMALL LETTER A WITH BREVE - u'\u0219' # 0xBF -> LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later - u'\xbf' # 0xC0 -> INVERTED QUESTION MARK - u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u2044' # 0xDA -> FRACTION SLASH - u'\u20ac' # 0xDB -> EURO SIGN - u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u021a' # 0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later - u'\u021b' # 0xDF -> LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later - u'\u2021' # 0xE0 -> DOUBLE DAGGER - u'\xb7' # 0xE1 -> MIDDLE DOT - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xE4 -> PER MILLE SIGN - u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\uf8ff' # 0xF0 -> Apple logo - u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I - u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xF7 -> SMALL TILDE - u'\xaf' # 0xF8 -> MACRON - u'\u02d8' # 0xF9 -> BREVE - u'\u02d9' # 0xFA -> DOT ABOVE - u'\u02da' # 0xFB -> RING ABOVE - u'\xb8' # 0xFC -> CEDILLA - u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT - u'\u02db' # 0xFE -> OGONEK - u'\u02c7' # 0xFF -> CARON + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE + u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE + u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\xb4' # 0xAB -> ACUTE ACCENT + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\u0102' # 0xAE -> LATIN CAPITAL LETTER A WITH BREVE + u'\u0218' # 0xAF -> LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0xB4 -> YEN SIGN + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u220f' # 0xB8 -> N-ARY PRODUCT + u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI + u'\u222b' # 0xBA -> INTEGRAL + u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR + u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA + u'\u0103' # 0xBE -> LATIN SMALL LETTER A WITH BREVE + u'\u0219' # 0xBF -> LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later + u'\xbf' # 0xC0 -> INVERTED QUESTION MARK + u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u2044' # 0xDA -> FRACTION SLASH + u'\u20ac' # 0xDB -> EURO SIGN + u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u021a' # 0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later + u'\u021b' # 0xDF -> LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later + u'\u2021' # 0xE0 -> DOUBLE DAGGER + u'\xb7' # 0xE1 -> MIDDLE DOT + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0xE4 -> PER MILLE SIGN + u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0xF0 -> Apple logo + u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I + u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0xF7 -> SMALL TILDE + u'\xaf' # 0xF8 -> MACRON + u'\u02d8' # 0xF9 -> BREVE + u'\u02d9' # 0xFA -> DOT ABOVE + u'\u02da' # 0xFB -> RING ABOVE + u'\xb8' # 0xFC -> CEDILLA + u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT + u'\u02db' # 0xFE -> OGONEK + u'\u02c7' # 0xFF -> CARON ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0xCA, # NO-BREAK SPACE - 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A5: 0xB4, # YEN SIGN - 0x00A7: 0xA4, # SECTION SIGN - 0x00A8: 0xAC, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xC2, # NOT SIGN - 0x00AE: 0xA8, # REGISTERED SIGN - 0x00AF: 0xF8, # MACRON - 0x00B0: 0xA1, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B4: 0xAB, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xA6, # PILCROW SIGN - 0x00B7: 0xE1, # MIDDLE DOT - 0x00B8: 0xFC, # CEDILLA - 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BF: 0xC0, # INVERTED QUESTION MARK - 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S - 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xD6, # DIVISION SIGN - 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0102: 0xAE, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0xBE, # LATIN SMALL LETTER A WITH BREVE - 0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xCF, # LATIN SMALL LIGATURE OE - 0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK - 0x0218: 0xAF, # LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later - 0x0219: 0xBF, # LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later - 0x021A: 0xDE, # LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later - 0x021B: 0xDF, # LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later - 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02C7: 0xFF, # CARON - 0x02D8: 0xF9, # BREVE - 0x02D9: 0xFA, # DOT ABOVE - 0x02DA: 0xFB, # RING ABOVE - 0x02DB: 0xFE, # OGONEK - 0x02DC: 0xF7, # SMALL TILDE - 0x02DD: 0xFD, # DOUBLE ACUTE ACCENT - 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA - 0x03C0: 0xB9, # GREEK SMALL LETTER PI - 0x2013: 0xD0, # EN DASH - 0x2014: 0xD1, # EM DASH - 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xA0, # DAGGER - 0x2021: 0xE0, # DOUBLE DAGGER - 0x2022: 0xA5, # BULLET - 0x2026: 0xC9, # HORIZONTAL ELLIPSIS - 0x2030: 0xE4, # PER MILLE SIGN - 0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x2044: 0xDA, # FRACTION SLASH - 0x20AC: 0xDB, # EURO SIGN - 0x2122: 0xAA, # TRADE MARK SIGN - 0x2202: 0xB6, # PARTIAL DIFFERENTIAL - 0x2206: 0xC6, # INCREMENT - 0x220F: 0xB8, # N-ARY PRODUCT - 0x2211: 0xB7, # N-ARY SUMMATION - 0x221A: 0xC3, # SQUARE ROOT - 0x221E: 0xB0, # INFINITY - 0x222B: 0xBA, # INTEGRAL - 0x2248: 0xC5, # ALMOST EQUAL TO - 0x2260: 0xAD, # NOT EQUAL TO - 0x2264: 0xB2, # LESS-THAN OR EQUAL TO - 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO - 0x25CA: 0xD7, # LOZENGE - 0xF8FF: 0xF0, # Apple logo + 0x0000: 0x00, # CONTROL CHARACTER + 0x0001: 0x01, # CONTROL CHARACTER + 0x0002: 0x02, # CONTROL CHARACTER + 0x0003: 0x03, # CONTROL CHARACTER + 0x0004: 0x04, # CONTROL CHARACTER + 0x0005: 0x05, # CONTROL CHARACTER + 0x0006: 0x06, # CONTROL CHARACTER + 0x0007: 0x07, # CONTROL CHARACTER + 0x0008: 0x08, # CONTROL CHARACTER + 0x0009: 0x09, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER + 0x0010: 0x10, # CONTROL CHARACTER + 0x0011: 0x11, # CONTROL CHARACTER + 0x0012: 0x12, # CONTROL CHARACTER + 0x0013: 0x13, # CONTROL CHARACTER + 0x0014: 0x14, # CONTROL CHARACTER + 0x0015: 0x15, # CONTROL CHARACTER + 0x0016: 0x16, # CONTROL CHARACTER + 0x0017: 0x17, # CONTROL CHARACTER + 0x0018: 0x18, # CONTROL CHARACTER + 0x0019: 0x19, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0xCA, # NO-BREAK SPACE + 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A5: 0xB4, # YEN SIGN + 0x00A7: 0xA4, # SECTION SIGN + 0x00A8: 0xAC, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xC2, # NOT SIGN + 0x00AE: 0xA8, # REGISTERED SIGN + 0x00AF: 0xF8, # MACRON + 0x00B0: 0xA1, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B4: 0xAB, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xA6, # PILCROW SIGN + 0x00B7: 0xE1, # MIDDLE DOT + 0x00B8: 0xFC, # CEDILLA + 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BF: 0xC0, # INVERTED QUESTION MARK + 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S + 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xD6, # DIVISION SIGN + 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0102: 0xAE, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0xBE, # LATIN SMALL LETTER A WITH BREVE + 0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE + 0x0153: 0xCF, # LATIN SMALL LIGATURE OE + 0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK + 0x0218: 0xAF, # LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later + 0x0219: 0xBF, # LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later + 0x021A: 0xDE, # LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later + 0x021B: 0xDF, # LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later + 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02C7: 0xFF, # CARON + 0x02D8: 0xF9, # BREVE + 0x02D9: 0xFA, # DOT ABOVE + 0x02DA: 0xFB, # RING ABOVE + 0x02DB: 0xFE, # OGONEK + 0x02DC: 0xF7, # SMALL TILDE + 0x02DD: 0xFD, # DOUBLE ACUTE ACCENT + 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA + 0x03C0: 0xB9, # GREEK SMALL LETTER PI + 0x2013: 0xD0, # EN DASH + 0x2014: 0xD1, # EM DASH + 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0xA0, # DAGGER + 0x2021: 0xE0, # DOUBLE DAGGER + 0x2022: 0xA5, # BULLET + 0x2026: 0xC9, # HORIZONTAL ELLIPSIS + 0x2030: 0xE4, # PER MILLE SIGN + 0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x2044: 0xDA, # FRACTION SLASH + 0x20AC: 0xDB, # EURO SIGN + 0x2122: 0xAA, # TRADE MARK SIGN + 0x2202: 0xB6, # PARTIAL DIFFERENTIAL + 0x2206: 0xC6, # INCREMENT + 0x220F: 0xB8, # N-ARY PRODUCT + 0x2211: 0xB7, # N-ARY SUMMATION + 0x221A: 0xC3, # SQUARE ROOT + 0x221E: 0xB0, # INFINITY + 0x222B: 0xBA, # INTEGRAL + 0x2248: 0xC5, # ALMOST EQUAL TO + 0x2260: 0xAD, # NOT EQUAL TO + 0x2264: 0xB2, # LESS-THAN OR EQUAL TO + 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO + 0x25CA: 0xD7, # LOZENGE + 0xF8FF: 0xF0, # Apple logo } - Modified: python/branches/ssize_t/Lib/encodings/mac_turkish.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/mac_turkish.py (original) +++ python/branches/ssize_t/Lib/encodings/mac_turkish.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,522 +32,521 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\xb4' # 0xAB -> ACUTE ACCENT - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE - u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xB4 -> YEN SIGN - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u220f' # 0xB8 -> N-ARY PRODUCT - u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI - u'\u222b' # 0xBA -> INTEGRAL - u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA - u'\xe6' # 0xBE -> LATIN SMALL LETTER AE - u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE - u'\xbf' # 0xC0 -> INVERTED QUESTION MARK - u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u011e' # 0xDA -> LATIN CAPITAL LETTER G WITH BREVE - u'\u011f' # 0xDB -> LATIN SMALL LETTER G WITH BREVE - u'\u0130' # 0xDC -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\u0131' # 0xDD -> LATIN SMALL LETTER DOTLESS I - u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u015f' # 0xDF -> LATIN SMALL LETTER S WITH CEDILLA - u'\u2021' # 0xE0 -> DOUBLE DAGGER - u'\xb7' # 0xE1 -> MIDDLE DOT - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xE4 -> PER MILLE SIGN - u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\uf8ff' # 0xF0 -> Apple logo - u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\uf8a0' # 0xF5 -> undefined1 - u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xF7 -> SMALL TILDE - u'\xaf' # 0xF8 -> MACRON - u'\u02d8' # 0xF9 -> BREVE - u'\u02d9' # 0xFA -> DOT ABOVE - u'\u02da' # 0xFB -> RING ABOVE - u'\xb8' # 0xFC -> CEDILLA - u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT - u'\u02db' # 0xFE -> OGONEK - u'\u02c7' # 0xFF -> CARON + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE + u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE + u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\xb4' # 0xAB -> ACUTE ACCENT + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE + u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0xB4 -> YEN SIGN + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u220f' # 0xB8 -> N-ARY PRODUCT + u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI + u'\u222b' # 0xBA -> INTEGRAL + u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR + u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA + u'\xe6' # 0xBE -> LATIN SMALL LETTER AE + u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0xC0 -> INVERTED QUESTION MARK + u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u011e' # 0xDA -> LATIN CAPITAL LETTER G WITH BREVE + u'\u011f' # 0xDB -> LATIN SMALL LETTER G WITH BREVE + u'\u0130' # 0xDC -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u0131' # 0xDD -> LATIN SMALL LETTER DOTLESS I + u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u015f' # 0xDF -> LATIN SMALL LETTER S WITH CEDILLA + u'\u2021' # 0xE0 -> DOUBLE DAGGER + u'\xb7' # 0xE1 -> MIDDLE DOT + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0xE4 -> PER MILLE SIGN + u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0xF0 -> Apple logo + u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\uf8a0' # 0xF5 -> undefined1 + u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0xF7 -> SMALL TILDE + u'\xaf' # 0xF8 -> MACRON + u'\u02d8' # 0xF9 -> BREVE + u'\u02d9' # 0xFA -> DOT ABOVE + u'\u02da' # 0xFB -> RING ABOVE + u'\xb8' # 0xFC -> CEDILLA + u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT + u'\u02db' # 0xFE -> OGONEK + u'\u02c7' # 0xFF -> CARON ) ### Encoding Map encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0xCA, # NO-BREAK SPACE - 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A5: 0xB4, # YEN SIGN - 0x00A7: 0xA4, # SECTION SIGN - 0x00A8: 0xAC, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xC2, # NOT SIGN - 0x00AE: 0xA8, # REGISTERED SIGN - 0x00AF: 0xF8, # MACRON - 0x00B0: 0xA1, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B4: 0xAB, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xA6, # PILCROW SIGN - 0x00B7: 0xE1, # MIDDLE DOT - 0x00B8: 0xFC, # CEDILLA - 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BF: 0xC0, # INVERTED QUESTION MARK - 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xAE, # LATIN CAPITAL LETTER AE - 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S - 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xBE, # LATIN SMALL LETTER AE - 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xD6, # DIVISION SIGN - 0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x011E: 0xDA, # LATIN CAPITAL LETTER G WITH BREVE - 0x011F: 0xDB, # LATIN SMALL LETTER G WITH BREVE - 0x0130: 0xDC, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0131: 0xDD, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xCF, # LATIN SMALL LIGATURE OE - 0x015E: 0xDE, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015F: 0xDF, # LATIN SMALL LETTER S WITH CEDILLA - 0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK - 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02C7: 0xFF, # CARON - 0x02D8: 0xF9, # BREVE - 0x02D9: 0xFA, # DOT ABOVE - 0x02DA: 0xFB, # RING ABOVE - 0x02DB: 0xFE, # OGONEK - 0x02DC: 0xF7, # SMALL TILDE - 0x02DD: 0xFD, # DOUBLE ACUTE ACCENT - 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA - 0x03C0: 0xB9, # GREEK SMALL LETTER PI - 0x2013: 0xD0, # EN DASH - 0x2014: 0xD1, # EM DASH - 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xA0, # DAGGER - 0x2021: 0xE0, # DOUBLE DAGGER - 0x2022: 0xA5, # BULLET - 0x2026: 0xC9, # HORIZONTAL ELLIPSIS - 0x2030: 0xE4, # PER MILLE SIGN - 0x2122: 0xAA, # TRADE MARK SIGN - 0x2202: 0xB6, # PARTIAL DIFFERENTIAL - 0x2206: 0xC6, # INCREMENT - 0x220F: 0xB8, # N-ARY PRODUCT - 0x2211: 0xB7, # N-ARY SUMMATION - 0x221A: 0xC3, # SQUARE ROOT - 0x221E: 0xB0, # INFINITY - 0x222B: 0xBA, # INTEGRAL - 0x2248: 0xC5, # ALMOST EQUAL TO - 0x2260: 0xAD, # NOT EQUAL TO - 0x2264: 0xB2, # LESS-THAN OR EQUAL TO - 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO - 0x25CA: 0xD7, # LOZENGE - 0xF8A0: 0xF5, # undefined1 - 0xF8FF: 0xF0, # Apple logo + 0x0000: 0x00, # CONTROL CHARACTER + 0x0001: 0x01, # CONTROL CHARACTER + 0x0002: 0x02, # CONTROL CHARACTER + 0x0003: 0x03, # CONTROL CHARACTER + 0x0004: 0x04, # CONTROL CHARACTER + 0x0005: 0x05, # CONTROL CHARACTER + 0x0006: 0x06, # CONTROL CHARACTER + 0x0007: 0x07, # CONTROL CHARACTER + 0x0008: 0x08, # CONTROL CHARACTER + 0x0009: 0x09, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER + 0x0010: 0x10, # CONTROL CHARACTER + 0x0011: 0x11, # CONTROL CHARACTER + 0x0012: 0x12, # CONTROL CHARACTER + 0x0013: 0x13, # CONTROL CHARACTER + 0x0014: 0x14, # CONTROL CHARACTER + 0x0015: 0x15, # CONTROL CHARACTER + 0x0016: 0x16, # CONTROL CHARACTER + 0x0017: 0x17, # CONTROL CHARACTER + 0x0018: 0x18, # CONTROL CHARACTER + 0x0019: 0x19, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0xCA, # NO-BREAK SPACE + 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A5: 0xB4, # YEN SIGN + 0x00A7: 0xA4, # SECTION SIGN + 0x00A8: 0xAC, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xC2, # NOT SIGN + 0x00AE: 0xA8, # REGISTERED SIGN + 0x00AF: 0xF8, # MACRON + 0x00B0: 0xA1, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B4: 0xAB, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xA6, # PILCROW SIGN + 0x00B7: 0xE1, # MIDDLE DOT + 0x00B8: 0xFC, # CEDILLA + 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BF: 0xC0, # INVERTED QUESTION MARK + 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xAE, # LATIN CAPITAL LETTER AE + 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S + 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xBE, # LATIN SMALL LETTER AE + 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xD6, # DIVISION SIGN + 0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011E: 0xDA, # LATIN CAPITAL LETTER G WITH BREVE + 0x011F: 0xDB, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0xDC, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0xDD, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE + 0x0153: 0xCF, # LATIN SMALL LIGATURE OE + 0x015E: 0xDE, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015F: 0xDF, # LATIN SMALL LETTER S WITH CEDILLA + 0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK + 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02C7: 0xFF, # CARON + 0x02D8: 0xF9, # BREVE + 0x02D9: 0xFA, # DOT ABOVE + 0x02DA: 0xFB, # RING ABOVE + 0x02DB: 0xFE, # OGONEK + 0x02DC: 0xF7, # SMALL TILDE + 0x02DD: 0xFD, # DOUBLE ACUTE ACCENT + 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA + 0x03C0: 0xB9, # GREEK SMALL LETTER PI + 0x2013: 0xD0, # EN DASH + 0x2014: 0xD1, # EM DASH + 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0xA0, # DAGGER + 0x2021: 0xE0, # DOUBLE DAGGER + 0x2022: 0xA5, # BULLET + 0x2026: 0xC9, # HORIZONTAL ELLIPSIS + 0x2030: 0xE4, # PER MILLE SIGN + 0x2122: 0xAA, # TRADE MARK SIGN + 0x2202: 0xB6, # PARTIAL DIFFERENTIAL + 0x2206: 0xC6, # INCREMENT + 0x220F: 0xB8, # N-ARY PRODUCT + 0x2211: 0xB7, # N-ARY SUMMATION + 0x221A: 0xC3, # SQUARE ROOT + 0x221E: 0xB0, # INFINITY + 0x222B: 0xBA, # INTEGRAL + 0x2248: 0xC5, # ALMOST EQUAL TO + 0x2260: 0xAD, # NOT EQUAL TO + 0x2264: 0xB2, # LESS-THAN OR EQUAL TO + 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO + 0x25CA: 0xD7, # LOZENGE + 0xF8A0: 0xF5, # undefined1 + 0xF8FF: 0xF0, # Apple logo } - Modified: python/branches/ssize_t/Lib/encodings/tis_620.py ============================================================================== --- python/branches/ssize_t/Lib/encodings/tis_620.py (original) +++ python/branches/ssize_t/Lib/encodings/tis_620.py Mon Jan 2 16:17:17 2006 @@ -15,7 +15,7 @@ def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) - + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -32,258 +32,258 @@ ### Decoding Table decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> u'\ufffe' - u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI - u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI - u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT - u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI - u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON - u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG - u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU - u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN - u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING - u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG - u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO - u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE - u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING - u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA - u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK - u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN - u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO - u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO - u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN - u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK - u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO - u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG - u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN - u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG - u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU - u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI - u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA - u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG - u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA - u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN - u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN - u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO - u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA - u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK - u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA - u'\u0e24' # 0xC4 -> THAI CHARACTER RU - u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING - u'\u0e26' # 0xC6 -> THAI CHARACTER LU - u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN - u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA - u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI - u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA - u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP - u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA - u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG - u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK - u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI - u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A - u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT - u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA - u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM - u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I - u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II - u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE - u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE - u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U - u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU - u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU + u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI + u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI + u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT + u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI + u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON + u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG + u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU + u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN + u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING + u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG + u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO + u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE + u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING + u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA + u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK + u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN + u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO + u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO + u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN + u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK + u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO + u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG + u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN + u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG + u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU + u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI + u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA + u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG + u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA + u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN + u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN + u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO + u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA + u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK + u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA + u'\u0e24' # 0xC4 -> THAI CHARACTER RU + u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING + u'\u0e26' # 0xC6 -> THAI CHARACTER LU + u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN + u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA + u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI + u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA + u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP + u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA + u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG + u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK + u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI + u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A + u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT + u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA + u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM + u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I + u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II + u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE + u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE + u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U + u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU + u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' - u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT - u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E - u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE - u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O - u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN - u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI - u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO - u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK - u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU - u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK - u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO - u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI - u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA - u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT - u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT - u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN - u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN - u'\u0e50' # 0xF0 -> THAI DIGIT ZERO - u'\u0e51' # 0xF1 -> THAI DIGIT ONE - u'\u0e52' # 0xF2 -> THAI DIGIT TWO - u'\u0e53' # 0xF3 -> THAI DIGIT THREE - u'\u0e54' # 0xF4 -> THAI DIGIT FOUR - u'\u0e55' # 0xF5 -> THAI DIGIT FIVE - u'\u0e56' # 0xF6 -> THAI DIGIT SIX - u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN - u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT - u'\u0e59' # 0xF9 -> THAI DIGIT NINE - u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU - u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT + u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT + u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E + u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE + u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O + u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN + u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI + u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO + u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK + u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU + u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK + u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO + u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI + u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA + u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT + u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT + u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN + u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN + u'\u0e50' # 0xF0 -> THAI DIGIT ZERO + u'\u0e51' # 0xF1 -> THAI DIGIT ONE + u'\u0e52' # 0xF2 -> THAI DIGIT TWO + u'\u0e53' # 0xF3 -> THAI DIGIT THREE + u'\u0e54' # 0xF4 -> THAI DIGIT FOUR + u'\u0e55' # 0xF5 -> THAI DIGIT FIVE + u'\u0e56' # 0xF6 -> THAI DIGIT SIX + u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN + u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT + u'\u0e59' # 0xF9 -> THAI DIGIT NINE + u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU + u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT u'\ufffe' u'\ufffe' u'\ufffe' @@ -293,252 +293,251 @@ ### Encoding Map encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # - 0x0081: 0x81, # - 0x0082: 0x82, # - 0x0083: 0x83, # - 0x0084: 0x84, # - 0x0085: 0x85, # - 0x0086: 0x86, # - 0x0087: 0x87, # - 0x0088: 0x88, # - 0x0089: 0x89, # - 0x008A: 0x8A, # - 0x008B: 0x8B, # - 0x008C: 0x8C, # - 0x008D: 0x8D, # - 0x008E: 0x8E, # - 0x008F: 0x8F, # - 0x0090: 0x90, # - 0x0091: 0x91, # - 0x0092: 0x92, # - 0x0093: 0x93, # - 0x0094: 0x94, # - 0x0095: 0x95, # - 0x0096: 0x96, # - 0x0097: 0x97, # - 0x0098: 0x98, # - 0x0099: 0x99, # - 0x009A: 0x9A, # - 0x009B: 0x9B, # - 0x009C: 0x9C, # - 0x009D: 0x9D, # - 0x009E: 0x9E, # - 0x009F: 0x9F, # - 0x0E01: 0xA1, # THAI CHARACTER KO KAI - 0x0E02: 0xA2, # THAI CHARACTER KHO KHAI - 0x0E03: 0xA3, # THAI CHARACTER KHO KHUAT - 0x0E04: 0xA4, # THAI CHARACTER KHO KHWAI - 0x0E05: 0xA5, # THAI CHARACTER KHO KHON - 0x0E06: 0xA6, # THAI CHARACTER KHO RAKHANG - 0x0E07: 0xA7, # THAI CHARACTER NGO NGU - 0x0E08: 0xA8, # THAI CHARACTER CHO CHAN - 0x0E09: 0xA9, # THAI CHARACTER CHO CHING - 0x0E0A: 0xAA, # THAI CHARACTER CHO CHANG - 0x0E0B: 0xAB, # THAI CHARACTER SO SO - 0x0E0C: 0xAC, # THAI CHARACTER CHO CHOE - 0x0E0D: 0xAD, # THAI CHARACTER YO YING - 0x0E0E: 0xAE, # THAI CHARACTER DO CHADA - 0x0E0F: 0xAF, # THAI CHARACTER TO PATAK - 0x0E10: 0xB0, # THAI CHARACTER THO THAN - 0x0E11: 0xB1, # THAI CHARACTER THO NANGMONTHO - 0x0E12: 0xB2, # THAI CHARACTER THO PHUTHAO - 0x0E13: 0xB3, # THAI CHARACTER NO NEN - 0x0E14: 0xB4, # THAI CHARACTER DO DEK - 0x0E15: 0xB5, # THAI CHARACTER TO TAO - 0x0E16: 0xB6, # THAI CHARACTER THO THUNG - 0x0E17: 0xB7, # THAI CHARACTER THO THAHAN - 0x0E18: 0xB8, # THAI CHARACTER THO THONG - 0x0E19: 0xB9, # THAI CHARACTER NO NU - 0x0E1A: 0xBA, # THAI CHARACTER BO BAIMAI - 0x0E1B: 0xBB, # THAI CHARACTER PO PLA - 0x0E1C: 0xBC, # THAI CHARACTER PHO PHUNG - 0x0E1D: 0xBD, # THAI CHARACTER FO FA - 0x0E1E: 0xBE, # THAI CHARACTER PHO PHAN - 0x0E1F: 0xBF, # THAI CHARACTER FO FAN - 0x0E20: 0xC0, # THAI CHARACTER PHO SAMPHAO - 0x0E21: 0xC1, # THAI CHARACTER MO MA - 0x0E22: 0xC2, # THAI CHARACTER YO YAK - 0x0E23: 0xC3, # THAI CHARACTER RO RUA - 0x0E24: 0xC4, # THAI CHARACTER RU - 0x0E25: 0xC5, # THAI CHARACTER LO LING - 0x0E26: 0xC6, # THAI CHARACTER LU - 0x0E27: 0xC7, # THAI CHARACTER WO WAEN - 0x0E28: 0xC8, # THAI CHARACTER SO SALA - 0x0E29: 0xC9, # THAI CHARACTER SO RUSI - 0x0E2A: 0xCA, # THAI CHARACTER SO SUA - 0x0E2B: 0xCB, # THAI CHARACTER HO HIP - 0x0E2C: 0xCC, # THAI CHARACTER LO CHULA - 0x0E2D: 0xCD, # THAI CHARACTER O ANG - 0x0E2E: 0xCE, # THAI CHARACTER HO NOKHUK - 0x0E2F: 0xCF, # THAI CHARACTER PAIYANNOI - 0x0E30: 0xD0, # THAI CHARACTER SARA A - 0x0E31: 0xD1, # THAI CHARACTER MAI HAN-AKAT - 0x0E32: 0xD2, # THAI CHARACTER SARA AA - 0x0E33: 0xD3, # THAI CHARACTER SARA AM - 0x0E34: 0xD4, # THAI CHARACTER SARA I - 0x0E35: 0xD5, # THAI CHARACTER SARA II - 0x0E36: 0xD6, # THAI CHARACTER SARA UE - 0x0E37: 0xD7, # THAI CHARACTER SARA UEE - 0x0E38: 0xD8, # THAI CHARACTER SARA U - 0x0E39: 0xD9, # THAI CHARACTER SARA UU - 0x0E3A: 0xDA, # THAI CHARACTER PHINTHU - 0x0E3F: 0xDF, # THAI CURRENCY SYMBOL BAHT - 0x0E40: 0xE0, # THAI CHARACTER SARA E - 0x0E41: 0xE1, # THAI CHARACTER SARA AE - 0x0E42: 0xE2, # THAI CHARACTER SARA O - 0x0E43: 0xE3, # THAI CHARACTER SARA AI MAIMUAN - 0x0E44: 0xE4, # THAI CHARACTER SARA AI MAIMALAI - 0x0E45: 0xE5, # THAI CHARACTER LAKKHANGYAO - 0x0E46: 0xE6, # THAI CHARACTER MAIYAMOK - 0x0E47: 0xE7, # THAI CHARACTER MAITAIKHU - 0x0E48: 0xE8, # THAI CHARACTER MAI EK - 0x0E49: 0xE9, # THAI CHARACTER MAI THO - 0x0E4A: 0xEA, # THAI CHARACTER MAI TRI - 0x0E4B: 0xEB, # THAI CHARACTER MAI CHATTAWA - 0x0E4C: 0xEC, # THAI CHARACTER THANTHAKHAT - 0x0E4D: 0xED, # THAI CHARACTER NIKHAHIT - 0x0E4E: 0xEE, # THAI CHARACTER YAMAKKAN - 0x0E4F: 0xEF, # THAI CHARACTER FONGMAN - 0x0E50: 0xF0, # THAI DIGIT ZERO - 0x0E51: 0xF1, # THAI DIGIT ONE - 0x0E52: 0xF2, # THAI DIGIT TWO - 0x0E53: 0xF3, # THAI DIGIT THREE - 0x0E54: 0xF4, # THAI DIGIT FOUR - 0x0E55: 0xF5, # THAI DIGIT FIVE - 0x0E56: 0xF6, # THAI DIGIT SIX - 0x0E57: 0xF7, # THAI DIGIT SEVEN - 0x0E58: 0xF8, # THAI DIGIT EIGHT - 0x0E59: 0xF9, # THAI DIGIT NINE - 0x0E5A: 0xFA, # THAI CHARACTER ANGKHANKHU - 0x0E5B: 0xFB, # THAI CHARACTER KHOMUT + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x0E01: 0xA1, # THAI CHARACTER KO KAI + 0x0E02: 0xA2, # THAI CHARACTER KHO KHAI + 0x0E03: 0xA3, # THAI CHARACTER KHO KHUAT + 0x0E04: 0xA4, # THAI CHARACTER KHO KHWAI + 0x0E05: 0xA5, # THAI CHARACTER KHO KHON + 0x0E06: 0xA6, # THAI CHARACTER KHO RAKHANG + 0x0E07: 0xA7, # THAI CHARACTER NGO NGU + 0x0E08: 0xA8, # THAI CHARACTER CHO CHAN + 0x0E09: 0xA9, # THAI CHARACTER CHO CHING + 0x0E0A: 0xAA, # THAI CHARACTER CHO CHANG + 0x0E0B: 0xAB, # THAI CHARACTER SO SO + 0x0E0C: 0xAC, # THAI CHARACTER CHO CHOE + 0x0E0D: 0xAD, # THAI CHARACTER YO YING + 0x0E0E: 0xAE, # THAI CHARACTER DO CHADA + 0x0E0F: 0xAF, # THAI CHARACTER TO PATAK + 0x0E10: 0xB0, # THAI CHARACTER THO THAN + 0x0E11: 0xB1, # THAI CHARACTER THO NANGMONTHO + 0x0E12: 0xB2, # THAI CHARACTER THO PHUTHAO + 0x0E13: 0xB3, # THAI CHARACTER NO NEN + 0x0E14: 0xB4, # THAI CHARACTER DO DEK + 0x0E15: 0xB5, # THAI CHARACTER TO TAO + 0x0E16: 0xB6, # THAI CHARACTER THO THUNG + 0x0E17: 0xB7, # THAI CHARACTER THO THAHAN + 0x0E18: 0xB8, # THAI CHARACTER THO THONG + 0x0E19: 0xB9, # THAI CHARACTER NO NU + 0x0E1A: 0xBA, # THAI CHARACTER BO BAIMAI + 0x0E1B: 0xBB, # THAI CHARACTER PO PLA + 0x0E1C: 0xBC, # THAI CHARACTER PHO PHUNG + 0x0E1D: 0xBD, # THAI CHARACTER FO FA + 0x0E1E: 0xBE, # THAI CHARACTER PHO PHAN + 0x0E1F: 0xBF, # THAI CHARACTER FO FAN + 0x0E20: 0xC0, # THAI CHARACTER PHO SAMPHAO + 0x0E21: 0xC1, # THAI CHARACTER MO MA + 0x0E22: 0xC2, # THAI CHARACTER YO YAK + 0x0E23: 0xC3, # THAI CHARACTER RO RUA + 0x0E24: 0xC4, # THAI CHARACTER RU + 0x0E25: 0xC5, # THAI CHARACTER LO LING + 0x0E26: 0xC6, # THAI CHARACTER LU + 0x0E27: 0xC7, # THAI CHARACTER WO WAEN + 0x0E28: 0xC8, # THAI CHARACTER SO SALA + 0x0E29: 0xC9, # THAI CHARACTER SO RUSI + 0x0E2A: 0xCA, # THAI CHARACTER SO SUA + 0x0E2B: 0xCB, # THAI CHARACTER HO HIP + 0x0E2C: 0xCC, # THAI CHARACTER LO CHULA + 0x0E2D: 0xCD, # THAI CHARACTER O ANG + 0x0E2E: 0xCE, # THAI CHARACTER HO NOKHUK + 0x0E2F: 0xCF, # THAI CHARACTER PAIYANNOI + 0x0E30: 0xD0, # THAI CHARACTER SARA A + 0x0E31: 0xD1, # THAI CHARACTER MAI HAN-AKAT + 0x0E32: 0xD2, # THAI CHARACTER SARA AA + 0x0E33: 0xD3, # THAI CHARACTER SARA AM + 0x0E34: 0xD4, # THAI CHARACTER SARA I + 0x0E35: 0xD5, # THAI CHARACTER SARA II + 0x0E36: 0xD6, # THAI CHARACTER SARA UE + 0x0E37: 0xD7, # THAI CHARACTER SARA UEE + 0x0E38: 0xD8, # THAI CHARACTER SARA U + 0x0E39: 0xD9, # THAI CHARACTER SARA UU + 0x0E3A: 0xDA, # THAI CHARACTER PHINTHU + 0x0E3F: 0xDF, # THAI CURRENCY SYMBOL BAHT + 0x0E40: 0xE0, # THAI CHARACTER SARA E + 0x0E41: 0xE1, # THAI CHARACTER SARA AE + 0x0E42: 0xE2, # THAI CHARACTER SARA O + 0x0E43: 0xE3, # THAI CHARACTER SARA AI MAIMUAN + 0x0E44: 0xE4, # THAI CHARACTER SARA AI MAIMALAI + 0x0E45: 0xE5, # THAI CHARACTER LAKKHANGYAO + 0x0E46: 0xE6, # THAI CHARACTER MAIYAMOK + 0x0E47: 0xE7, # THAI CHARACTER MAITAIKHU + 0x0E48: 0xE8, # THAI CHARACTER MAI EK + 0x0E49: 0xE9, # THAI CHARACTER MAI THO + 0x0E4A: 0xEA, # THAI CHARACTER MAI TRI + 0x0E4B: 0xEB, # THAI CHARACTER MAI CHATTAWA + 0x0E4C: 0xEC, # THAI CHARACTER THANTHAKHAT + 0x0E4D: 0xED, # THAI CHARACTER NIKHAHIT + 0x0E4E: 0xEE, # THAI CHARACTER YAMAKKAN + 0x0E4F: 0xEF, # THAI CHARACTER FONGMAN + 0x0E50: 0xF0, # THAI DIGIT ZERO + 0x0E51: 0xF1, # THAI DIGIT ONE + 0x0E52: 0xF2, # THAI DIGIT TWO + 0x0E53: 0xF3, # THAI DIGIT THREE + 0x0E54: 0xF4, # THAI DIGIT FOUR + 0x0E55: 0xF5, # THAI DIGIT FIVE + 0x0E56: 0xF6, # THAI DIGIT SIX + 0x0E57: 0xF7, # THAI DIGIT SEVEN + 0x0E58: 0xF8, # THAI DIGIT EIGHT + 0x0E59: 0xF9, # THAI DIGIT NINE + 0x0E5A: 0xFA, # THAI CHARACTER ANGKHANKHU + 0x0E5B: 0xFB, # THAI CHARACTER KHOMUT } - Modified: python/branches/ssize_t/Lib/macurl2path.py ============================================================================== --- python/branches/ssize_t/Lib/macurl2path.py (original) +++ python/branches/ssize_t/Lib/macurl2path.py Mon Jan 2 16:17:17 2006 @@ -8,7 +8,8 @@ __all__ = ["url2pathname","pathname2url"] def url2pathname(pathname): - "Convert /-delimited pathname to mac pathname" + """OS-specific conversion from a relative URL of the 'file' scheme + to a file system path; not recommended for general use.""" # # XXXX The .. handling should be fixed... # @@ -49,7 +50,8 @@ return urllib.unquote(rv) def pathname2url(pathname): - "convert mac pathname to /-delimited pathname" + """OS-specific conversion from a file system path to a relative URL + of the 'file' scheme; not recommended for general use.""" if '/' in pathname: raise RuntimeError, "Cannot convert pathname containing slashes" components = pathname.split(':') Modified: python/branches/ssize_t/Lib/nturl2path.py ============================================================================== --- python/branches/ssize_t/Lib/nturl2path.py (original) +++ python/branches/ssize_t/Lib/nturl2path.py Mon Jan 2 16:17:17 2006 @@ -1,14 +1,12 @@ """Convert a NT pathname to a file URL and vice versa.""" def url2pathname(url): - r"""Convert a URL to a DOS path. - - ///C|/foo/bar/spam.foo - - becomes - - C:\foo\bar\spam.foo - """ + """OS-specific conversion from a relative URL of the 'file' scheme + to a file system path; not recommended for general use.""" + # e.g. + # ///C|/foo/bar/spam.foo + # becomes + # C:\foo\bar\spam.foo import string, urllib # Windows itself uses ":" even in URLs. url = url.replace(':', '|') @@ -35,15 +33,12 @@ return path def pathname2url(p): - r"""Convert a DOS path name to a file url. - - C:\foo\bar\spam.foo - - becomes - - ///C|/foo/bar/spam.foo - """ - + """OS-specific conversion from a file system path to a relative URL + of the 'file' scheme; not recommended for general use.""" + # e.g. + # C:\foo\bar\spam.foo + # becomes + # ///C|/foo/bar/spam.foo import urllib if not ':' in p: # No drive specifier, just convert slashes and quote the name Modified: python/branches/ssize_t/Lib/plat-riscos/rourl2path.py ============================================================================== --- python/branches/ssize_t/Lib/plat-riscos/rourl2path.py (original) +++ python/branches/ssize_t/Lib/plat-riscos/rourl2path.py Mon Jan 2 16:17:17 2006 @@ -11,7 +11,8 @@ __slash_dot = string.maketrans("/.", "./") def url2pathname(url): - "Convert URL to a RISC OS path." + """OS-specific conversion from a relative URL of the 'file' scheme + to a file system path; not recommended for general use.""" tp = urllib.splittype(url)[0] if tp and tp <> 'file': raise RuntimeError, 'Cannot convert non-local URL to pathname' @@ -46,7 +47,8 @@ return '.'.join(components) def pathname2url(pathname): - "Convert a RISC OS path name to a file url." + """OS-specific conversion from a file system path to a relative URL + of the 'file' scheme; not recommended for general use.""" return urllib.quote('///' + pathname.translate(__slash_dot), "/$:") def test(): Modified: python/branches/ssize_t/Lib/platform.py ============================================================================== --- python/branches/ssize_t/Lib/platform.py (original) +++ python/branches/ssize_t/Lib/platform.py Mon Jan 2 16:17:17 2006 @@ -1092,7 +1092,7 @@ ### Various APIs for extracting information from sys.version _sys_version_parser = re.compile(r'([\w.+]+)\s*' - '\(#(\d+),\s*([\w ]+),\s*([\w :]+)\)\s*' + '\(#?(\d+:?\d+M?),\s*([\w ]+),\s*([\w :]+)\)\s*' '\[([^\]]+)\]?') _sys_version_cache = None @@ -1114,7 +1114,6 @@ return _sys_version_cache version, buildno, builddate, buildtime, compiler = \ _sys_version_parser.match(sys.version).groups() - buildno = int(buildno) builddate = builddate + ' ' + buildtime l = string.split(version, '.') if len(l) == 2: Modified: python/branches/ssize_t/Lib/test/test__locale.py ============================================================================== --- python/branches/ssize_t/Lib/test/test__locale.py (original) +++ python/branches/ssize_t/Lib/test/test__locale.py Mon Jan 2 16:17:17 2006 @@ -97,6 +97,18 @@ loc, set_locale)) + def test_float_parsing(self): + # Bug #1391872: Test whether float parsing is okay on European + # locales. + for loc in candidate_locales: + try: + setlocale(LC_NUMERIC, loc) + except Error: + continue + self.assertEquals(int(eval('3.14') * 100), 314) + self.assertEquals(int(float('3.14') * 100), 314) + + def test_main(): run_unittest(_LocaleTests) Modified: python/branches/ssize_t/Lib/test/test_builtin.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_builtin.py (original) +++ python/branches/ssize_t/Lib/test/test_builtin.py Mon Jan 2 16:17:17 2006 @@ -545,6 +545,8 @@ self.assertEqual(float(314), 314.0) self.assertEqual(float(314L), 314.0) self.assertEqual(float(" 3.14 "), 3.14) + self.assertRaises(ValueError, float, " 0x3.1 ") + self.assertRaises(ValueError, float, " -0x3.p-1 ") if have_unicode: self.assertEqual(float(unicode(" 3.14 ")), 3.14) self.assertEqual(float(unicode(" \u0663.\u0661\u0664 ",'raw-unicode-escape')), 3.14) @@ -572,8 +574,8 @@ self.assertEqual(float(" 3,14 "), 3.14) self.assertEqual(float(" +3,14 "), 3.14) self.assertEqual(float(" -3,14 "), -3.14) - self.assertEqual(float(" 0x3.1 "), 3.0625) - self.assertEqual(float(" -0x3.p-1 "), -1.5) + self.assertRaises(ValueError, float, " 0x3.1 ") + self.assertRaises(ValueError, float, " -0x3.p-1 ") self.assertEqual(float(" 25.e-1 "), 2.5) self.assertEqual(fcmp(float(" .25e-1 "), .025), 0) finally: Modified: python/branches/ssize_t/Lib/test/test_code.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_code.py (original) +++ python/branches/ssize_t/Lib/test/test_code.py Mon Jan 2 16:17:17 2006 @@ -33,7 +33,7 @@ ... b = x - y ... c = a * b ... return c -... +... >>> dump(h.func_code) name: h argcount: 2 Modified: python/branches/ssize_t/Lib/test/test_coding.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_coding.py (original) +++ python/branches/ssize_t/Lib/test/test_coding.py Mon Jan 2 16:17:17 2006 @@ -5,6 +5,13 @@ class CodingTest(unittest.TestCase): def test_bad_coding(self): module_name = 'bad_coding' + self.verify_bad_module(module_name) + + def test_bad_coding2(self): + module_name = 'bad_coding2' + self.verify_bad_module(module_name) + + def verify_bad_module(self, module_name): self.assertRaises(SyntaxError, __import__, 'test.' + module_name) path = os.path.dirname(__file__) Modified: python/branches/ssize_t/Lib/test/test_cookielib.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_cookielib.py (original) +++ python/branches/ssize_t/Lib/test/test_cookielib.py Mon Jan 2 16:17:17 2006 @@ -248,6 +248,31 @@ except OSError: pass self.assertEqual(c._cookies["www.acme.com"]["/"]["boo"].value, None) + def test_bad_magic(self): + from cookielib import LWPCookieJar, MozillaCookieJar, LoadError + # IOErrors (eg. file doesn't exist) are allowed to propagate + filename = test_support.TESTFN + for cookiejar_class in LWPCookieJar, MozillaCookieJar: + c = cookiejar_class() + try: + c.load(filename="for this test to work, a file with this " + "filename should not exist") + except IOError, exc: + # exactly IOError, not LoadError + self.assertEqual(exc.__class__, IOError) + else: + self.fail("expected IOError for invalid filename") + # Invalid contents of cookies file (eg. bad magic string) + # causes a LoadError. + try: + f = open(filename, "w") + f.write("oops\n") + for cookiejar_class in LWPCookieJar, MozillaCookieJar: + c = cookiejar_class() + self.assertRaises(LoadError, c.load, filename) + finally: + try: os.unlink(filename) + except OSError: pass class CookieTests(TestCase): # XXX @@ -361,6 +386,39 @@ self.assertEquals(interact_netscape(c, "http://www.acme.com/foo/"), '"spam"; eggs') + def test_rfc2109_handling(self): + # RFC 2109 cookies are handled as RFC 2965 or Netscape cookies, + # dependent on policy settings + from cookielib import CookieJar, DefaultCookiePolicy + + for rfc2109_as_netscape, rfc2965, version in [ + # default according to rfc2965 if not explicitly specified + (None, False, 0), + (None, True, 1), + # explicit rfc2109_as_netscape + (False, False, None), # version None here means no cookie stored + (False, True, 1), + (True, False, 0), + (True, True, 0), + ]: + policy = DefaultCookiePolicy( + rfc2109_as_netscape=rfc2109_as_netscape, + rfc2965=rfc2965) + c = CookieJar(policy) + interact_netscape(c, "http://www.example.com/", "ni=ni; Version=1") + try: + cookie = c._cookies["www.example.com"]["/"]["ni"] + except KeyError: + self.assert_(version is None) # didn't expect a stored cookie + else: + self.assertEqual(cookie.version, version) + # 2965 cookies are unaffected + interact_2965(c, "http://www.example.com/", + "foo=bar; Version=1") + if rfc2965: + cookie2965 = c._cookies["www.example.com"]["/"]["foo"] + self.assertEqual(cookie2965.version, 1) + def test_ns_parser(self): from cookielib import CookieJar, DEFAULT_HTTP_PORT Modified: python/branches/ssize_t/Lib/test/test_csv.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_csv.py (original) +++ python/branches/ssize_t/Lib/test/test_csv.py Mon Jan 2 16:17:17 2006 @@ -836,7 +836,6 @@ 'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow' 'Stonecutters Seafood and Chop House':'Lemont':'IL':'12/19/02':'Week Back' """ - header = '''\ "venue","city","state","date","performers" ''' @@ -852,6 +851,10 @@ 47483648;43.0;170;abc;def ''' + sample5 = "aaa\tbbb\r\nAAA\t\r\nBBB\t\r\n" + sample6 = "a|b|c\r\nd|e|f\r\n" + sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n" + def test_has_header(self): sniffer = csv.Sniffer() self.assertEqual(sniffer.has_header(self.sample1), False) @@ -879,6 +882,13 @@ self.assertEqual(dialect.delimiter, "/") dialect = sniffer.sniff(self.sample4) self.assertEqual(dialect.delimiter, ";") + dialect = sniffer.sniff(self.sample5) + self.assertEqual(dialect.delimiter, "\t") + dialect = sniffer.sniff(self.sample6) + self.assertEqual(dialect.delimiter, "|") + dialect = sniffer.sniff(self.sample7) + self.assertEqual(dialect.delimiter, "|") + self.assertEqual(dialect.quotechar, "'") if not hasattr(sys, "gettotalrefcount"): if test_support.verbose: print "*** skipping leakage tests ***" Modified: python/branches/ssize_t/Lib/test/test_descr.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_descr.py (original) +++ python/branches/ssize_t/Lib/test/test_descr.py Mon Jan 2 16:17:17 2006 @@ -1635,6 +1635,37 @@ vereq(X.__mro__, (object, A, C, B, D, X)) vereq(X().f(), "A") + try: + class X(object): + class __metaclass__(type): + def mro(self): + return [self, dict, object] + except TypeError: + pass + else: + raise TestFailed, "devious mro() return not caught" + + try: + class X(object): + class __metaclass__(type): + def mro(self): + return [1] + except TypeError: + pass + else: + raise TestFailed, "non-class mro() return not caught" + + try: + class X(object): + class __metaclass__(type): + def mro(self): + return 1 + except TypeError: + pass + else: + raise TestFailed, "non-sequence mro() return not caught" + + def overloading(): if verbose: print "Testing operator overloading..." @@ -3990,6 +4021,77 @@ verify(l.__add__.__objclass__ is list) vereq(l.__add__.__doc__, list.__add__.__doc__) +def notimplemented(): + # all binary methods should be able to return a NotImplemented + if verbose: + print "Testing NotImplemented..." + + import sys + import types + import operator + + def specialmethod(self, other): + return NotImplemented + + def check(expr, x, y): + try: + exec expr in {'x': x, 'y': y, 'operator': operator} + except TypeError: + pass + else: + raise TestFailed("no TypeError from %r" % (expr,)) + + N1 = sys.maxint + 1L # might trigger OverflowErrors instead of TypeErrors + N2 = sys.maxint # if sizeof(int) < sizeof(long), might trigger + # ValueErrors instead of TypeErrors + for metaclass in [type, types.ClassType]: + for name, expr, iexpr in [ + ('__add__', 'x + y', 'x += y'), + ('__sub__', 'x - y', 'x -= y'), + ('__mul__', 'x * y', 'x *= y'), + ('__truediv__', 'operator.truediv(x, y)', None), + ('__floordiv__', 'operator.floordiv(x, y)', None), + ('__div__', 'x / y', 'x /= y'), + ('__mod__', 'x % y', 'x %= y'), + ('__divmod__', 'divmod(x, y)', None), + ('__pow__', 'x ** y', 'x **= y'), + ('__lshift__', 'x << y', 'x <<= y'), + ('__rshift__', 'x >> y', 'x >>= y'), + ('__and__', 'x & y', 'x &= y'), + ('__or__', 'x | y', 'x |= y'), + ('__xor__', 'x ^ y', 'x ^= y'), + ('__coerce__', 'coerce(x, y)', None)]: + if name == '__coerce__': + rname = name + else: + rname = '__r' + name[2:] + A = metaclass('A', (), {name: specialmethod}) + B = metaclass('B', (), {rname: specialmethod}) + a = A() + b = B() + check(expr, a, a) + check(expr, a, b) + check(expr, b, a) + check(expr, b, b) + check(expr, a, N1) + check(expr, a, N2) + check(expr, N1, b) + check(expr, N2, b) + if iexpr: + check(iexpr, a, a) + check(iexpr, a, b) + check(iexpr, b, a) + check(iexpr, b, b) + check(iexpr, a, N1) + check(iexpr, a, N2) + iname = '__i' + name[2:] + C = metaclass('C', (), {iname: specialmethod}) + c = C() + check(iexpr, c, a) + check(iexpr, c, b) + check(iexpr, c, N1) + check(iexpr, c, N2) + def test_main(): weakref_segfault() # Must be first, somehow do_this_first() @@ -4084,6 +4186,7 @@ vicious_descriptor_nonsense() test_init() methodwrapper() + notimplemented() if verbose: print "All OK" Modified: python/branches/ssize_t/Lib/test/test_dis.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_dis.py (original) +++ python/branches/ssize_t/Lib/test/test_dis.py Mon Jan 2 16:17:17 2006 @@ -46,6 +46,43 @@ bug708901.func_code.co_firstlineno + 2, bug708901.func_code.co_firstlineno + 3) + +def bug1333982(x=[]): + assert 0, ([s for s in x] + + 1) + pass + +dis_bug1333982 = """\ + %-4d 0 LOAD_CONST 1 (0) + 3 JUMP_IF_TRUE 47 (to 53) + 6 POP_TOP + 7 LOAD_GLOBAL 0 (AssertionError) + 10 BUILD_LIST 0 + 13 DUP_TOP + 14 LOAD_ATTR 1 (append) + 17 STORE_FAST 1 (_[1]) + 20 LOAD_FAST 0 (x) + 23 GET_ITER + >> 24 FOR_ITER 16 (to 43) + 27 STORE_FAST 2 (s) + 30 LOAD_FAST 1 (_[1]) + 33 LOAD_FAST 2 (s) + 36 CALL_FUNCTION 1 + 39 POP_TOP + 40 JUMP_ABSOLUTE 24 + >> 43 DELETE_FAST 1 (_[1]) + + %-4d 46 LOAD_CONST 2 (1) + 49 BINARY_ADD + 50 RAISE_VARARGS 2 + >> 53 POP_TOP + + %-4d 54 LOAD_CONST 0 (None) + 57 RETURN_VALUE +"""%(bug1333982.func_code.co_firstlineno + 1, + bug1333982.func_code.co_firstlineno + 2, + bug1333982.func_code.co_firstlineno + 3) + class DisTests(unittest.TestCase): def do_disassembly_test(self, func, expected): s = StringIO.StringIO() @@ -83,6 +120,12 @@ def test_bug_708901(self): self.do_disassembly_test(bug708901, dis_bug708901) + def test_bug_1333982(self): + # This one is checking bytecodes generated for an `assert` statement, + # so fails if the tests are run with -O. Skip this test then. + if __debug__: + self.do_disassembly_test(bug1333982, dis_bug1333982) + def test_main(): run_unittest(DisTests) Modified: python/branches/ssize_t/Lib/test/test_generators.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_generators.py (original) +++ python/branches/ssize_t/Lib/test/test_generators.py Mon Jan 2 16:17:17 2006 @@ -774,7 +774,7 @@ ... try: ... 1//0 ... except ZeroDivisionError: -... yield 666 +... yield 666 ... except: ... pass ... finally: Modified: python/branches/ssize_t/Lib/test/test_locale.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_locale.py (original) +++ python/branches/ssize_t/Lib/test/test_locale.py Mon Jan 2 16:17:17 2006 @@ -38,13 +38,15 @@ print "yes" try: - testformat("%f", 1024, grouping=1, output='1,024.000000') + # On Solaris 10, the thousands_sep is the empty string + sep = locale.localeconv()['thousands_sep'] + testformat("%f", 1024, grouping=1, output='1%s024.000000' % sep) testformat("%f", 102, grouping=1, output='102.000000') testformat("%f", -42, grouping=1, output='-42.000000') testformat("%+f", -42, grouping=1, output='-42.000000') testformat("%20.f", -42, grouping=1, output=' -42') - testformat("%+10.f", -4200, grouping=1, output=' -4,200') - testformat("%-10.f", 4200, grouping=1, output='4,200 ') + testformat("%+10.f", -4200, grouping=1, output=' -4%s200' % sep) + testformat("%-10.f", 4200, grouping=1, output='4%s200 ' % sep) # Invoke getpreferredencoding to make sure it does not cause exceptions, locale.getpreferredencoding() finally: @@ -65,6 +67,9 @@ print "yes" try: + if sys.platform == 'sunos5': + # On Solaris, in en_US.UTF-8, \xa0 is a space + raise locale.Error oldlocale = locale.setlocale(locale.LC_CTYPE) locale.setlocale(locale.LC_CTYPE, 'en_US.UTF-8') except locale.Error: Modified: python/branches/ssize_t/Lib/test/test_logging.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_logging.py (original) +++ python/branches/ssize_t/Lib/test/test_logging.py Mon Jan 2 16:17:17 2006 @@ -487,11 +487,20 @@ # or a Mac OS X box which supports very little locale stuff at all original_locale = None + # Save and restore the original root logger level across the tests. + # Otherwise, e.g., if any test using cookielib runs after test_logging, + # cookielib's debug-level logger tries to log messages, leading to + # confusing: + # No handlers could be found for logger "cookielib" + # output while the tests are running. + root_logger = logging.getLogger("") + original_logging_level = root_logger.getEffectiveLevel() try: test_main_inner() finally: if original_locale is not None: locale.setlocale(locale.LC_ALL, original_locale) + root_logger.setLevel(original_logging_level) if __name__ == "__main__": sys.stdout.write("test_logging\n") Modified: python/branches/ssize_t/Lib/test/test_minidom.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_minidom.py (original) +++ python/branches/ssize_t/Lib/test/test_minidom.py Mon Jan 2 16:17:17 2006 @@ -1137,9 +1137,9 @@ # Check that replacing a child with itself leaves the tree unchanged elem.replaceChild(e, e) confirm(e.parentNode is elem, "After replaceChild()") - - - + + + def testReplaceWholeText(): def setup(): doc = parseString("ad") Modified: python/branches/ssize_t/Lib/test/test_mmap.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_mmap.py (original) +++ python/branches/ssize_t/Lib/test/test_mmap.py Mon Jan 2 16:17:17 2006 @@ -356,6 +356,22 @@ finally: os.unlink(TESTFN) + # make move works everywhere (64-bit format problem earlier) + f = open(TESTFN, 'w+') + + try: # unlink TESTFN no matter what + f.write("ABCDEabcde") # Arbitrary character + f.flush() + + mf = mmap.mmap(f.fileno(), 10) + mf.move(5, 0, 5) + verify(mf[:] == "ABCDEABCDE", "Map move should have duplicated front 5") + mf.close() + f.close() + + finally: + os.unlink(TESTFN) + print ' Test passed' test_both() Modified: python/branches/ssize_t/Lib/test/test_operator.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_operator.py (original) +++ python/branches/ssize_t/Lib/test/test_operator.py Mon Jan 2 16:17:17 2006 @@ -3,6 +3,34 @@ from test import test_support +class Seq1: + def __init__(self, lst): + self.lst = lst + def __len__(self): + return len(self.lst) + def __getitem__(self, i): + return self.lst[i] + def __add__(self, other): + return self.lst + other.lst + def __mul__(self, other): + return self.lst * other + def __rmul__(self, other): + return other * self.lst + +class Seq2(object): + def __init__(self, lst): + self.lst = lst + def __len__(self): + return len(self.lst) + def __getitem__(self, i): + return self.lst[i] + def __add__(self, other): + return self.lst + other.lst + def __mul__(self, other): + return self.lst * other + def __rmul__(self, other): + return other * self.lst + class OperatorTestCase(unittest.TestCase): def test_lt(self): @@ -92,6 +120,9 @@ self.failUnlessRaises(TypeError, operator.concat, None, None) self.failUnless(operator.concat('py', 'thon') == 'python') self.failUnless(operator.concat([1, 2], [3, 4]) == [1, 2, 3, 4]) + self.failUnless(operator.concat(Seq1([5, 6]), Seq1([7])) == [5, 6, 7]) + self.failUnless(operator.concat(Seq2([5, 6]), Seq2([7])) == [5, 6, 7]) + self.failUnlessRaises(TypeError, operator.concat, 13, 29) def test_countOf(self): self.failUnlessRaises(TypeError, operator.countOf) @@ -246,6 +277,15 @@ self.failUnless(operator.repeat(a, 2) == a+a) self.failUnless(operator.repeat(a, 1) == a) self.failUnless(operator.repeat(a, 0) == '') + a = Seq1([4, 5, 6]) + self.failUnless(operator.repeat(a, 2) == [4, 5, 6, 4, 5, 6]) + self.failUnless(operator.repeat(a, 1) == [4, 5, 6]) + self.failUnless(operator.repeat(a, 0) == []) + a = Seq2([4, 5, 6]) + self.failUnless(operator.repeat(a, 2) == [4, 5, 6, 4, 5, 6]) + self.failUnless(operator.repeat(a, 1) == [4, 5, 6]) + self.failUnless(operator.repeat(a, 0) == []) + self.failUnlessRaises(TypeError, operator.repeat, 6, 7) def test_rshift(self): self.failUnlessRaises(TypeError, operator.rshift) @@ -372,6 +412,53 @@ self.assertEqual(operator.itemgetter(2,10,5)(data), ('2', '10', '5')) self.assertRaises(TypeError, operator.itemgetter(2, 'x', 5), data) + def test_inplace(self): + class C(object): + def __iadd__ (self, other): return "iadd" + def __iand__ (self, other): return "iand" + def __idiv__ (self, other): return "idiv" + def __ifloordiv__(self, other): return "ifloordiv" + def __ilshift__ (self, other): return "ilshift" + def __imod__ (self, other): return "imod" + def __imul__ (self, other): return "imul" + def __ior__ (self, other): return "ior" + def __ipow__ (self, other): return "ipow" + def __irshift__ (self, other): return "irshift" + def __isub__ (self, other): return "isub" + def __itruediv__ (self, other): return "itruediv" + def __ixor__ (self, other): return "ixor" + def __getitem__(self, other): return 5 # so that C is a sequence + c = C() + self.assertEqual(operator.iadd (c, 5), "iadd") + self.assertEqual(operator.iand (c, 5), "iand") + self.assertEqual(operator.idiv (c, 5), "idiv") + self.assertEqual(operator.ifloordiv(c, 5), "ifloordiv") + self.assertEqual(operator.ilshift (c, 5), "ilshift") + self.assertEqual(operator.imod (c, 5), "imod") + self.assertEqual(operator.imul (c, 5), "imul") + self.assertEqual(operator.ior (c, 5), "ior") + self.assertEqual(operator.ipow (c, 5), "ipow") + self.assertEqual(operator.irshift (c, 5), "irshift") + self.assertEqual(operator.isub (c, 5), "isub") + self.assertEqual(operator.itruediv (c, 5), "itruediv") + self.assertEqual(operator.ixor (c, 5), "ixor") + self.assertEqual(operator.iconcat (c, c), "iadd") + self.assertEqual(operator.irepeat (c, 5), "imul") + self.assertEqual(operator.__iadd__ (c, 5), "iadd") + self.assertEqual(operator.__iand__ (c, 5), "iand") + self.assertEqual(operator.__idiv__ (c, 5), "idiv") + self.assertEqual(operator.__ifloordiv__(c, 5), "ifloordiv") + self.assertEqual(operator.__ilshift__ (c, 5), "ilshift") + self.assertEqual(operator.__imod__ (c, 5), "imod") + self.assertEqual(operator.__imul__ (c, 5), "imul") + self.assertEqual(operator.__ior__ (c, 5), "ior") + self.assertEqual(operator.__ipow__ (c, 5), "ipow") + self.assertEqual(operator.__irshift__ (c, 5), "irshift") + self.assertEqual(operator.__isub__ (c, 5), "isub") + self.assertEqual(operator.__itruediv__ (c, 5), "itruediv") + self.assertEqual(operator.__ixor__ (c, 5), "ixor") + self.assertEqual(operator.__iconcat__ (c, c), "iadd") + self.assertEqual(operator.__irepeat__ (c, 5), "imul") def test_main(verbose=None): import sys Modified: python/branches/ssize_t/Lib/test/test_poll.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_poll.py (original) +++ python/branches/ssize_t/Lib/test/test_poll.py Mon Jan 2 16:17:17 2006 @@ -185,7 +185,7 @@ if x != 5: print 'Overflow must have occurred' print 'Poll test 3 complete' - + test_poll1() test_poll2() Modified: python/branches/ssize_t/Lib/test/test_quopri.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_quopri.py (original) +++ python/branches/ssize_t/Lib/test/test_quopri.py Mon Jan 2 16:17:17 2006 @@ -175,18 +175,18 @@ self.assert_(quopri.decodestring(e, header=True) == p) def test_scriptencode(self): - (p, e) = self.STRINGS[-1] - (cin, cout) = os.popen2("%s -mquopri" % sys.executable) - cin.write(p) - cin.close() - self.assert_(cout.read() == e) + (p, e) = self.STRINGS[-1] + (cin, cout) = os.popen2("%s -mquopri" % sys.executable) + cin.write(p) + cin.close() + self.assert_(cout.read() == e) def test_scriptdecode(self): - (p, e) = self.STRINGS[-1] - (cin, cout) = os.popen2("%s -mquopri -d" % sys.executable) - cin.write(e) - cin.close() - self.assert_(cout.read() == p) + (p, e) = self.STRINGS[-1] + (cin, cout) = os.popen2("%s -mquopri -d" % sys.executable) + cin.write(e) + cin.close() + self.assert_(cout.read() == p) def test_main(): test_support.run_unittest(QuopriTestCase) Modified: python/branches/ssize_t/Lib/test/test_xmlrpc.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_xmlrpc.py (original) +++ python/branches/ssize_t/Lib/test/test_xmlrpc.py Mon Jan 2 16:17:17 2006 @@ -78,7 +78,7 @@ def test_bug_1164912 (self): d = xmlrpclib.DateTime() - ((new_d,), dummy) = xmlrpclib.loads(xmlrpclib.dumps((d,), + ((new_d,), dummy) = xmlrpclib.loads(xmlrpclib.dumps((d,), methodresponse=True)) self.assert_(isinstance(new_d.value, str)) Modified: python/branches/ssize_t/Lib/urllib.py ============================================================================== --- python/branches/ssize_t/Lib/urllib.py (original) +++ python/branches/ssize_t/Lib/urllib.py Mon Jan 2 16:17:17 2006 @@ -50,8 +50,13 @@ from rourl2path import url2pathname, pathname2url else: def url2pathname(pathname): + """OS-specific conversion from a relative URL of the 'file' scheme + to a file system path; not recommended for general use.""" return unquote(pathname) + def pathname2url(pathname): + """OS-specific conversion from a file system path to a relative URL + of the 'file' scheme; not recommended for general use.""" return quote(pathname) # This really consists of two pieces: Modified: python/branches/ssize_t/Lib/webbrowser.py ============================================================================== --- python/branches/ssize_t/Lib/webbrowser.py (original) +++ python/branches/ssize_t/Lib/webbrowser.py Mon Jan 2 16:17:17 2006 @@ -96,7 +96,7 @@ if sys.platform[:3] == "win": def _isexecutable(cmd): cmd = cmd.lower() - if os.path.isfile(cmd) and (cmd.endswith(".exe") or + if os.path.isfile(cmd) and (cmd.endswith(".exe") or cmd.endswith(".bat")): return True for ext in ".exe", ".bat": @@ -134,7 +134,7 @@ def __init__(self, name=""): self.name = name self.basename = name - + def open(self, url, new=0, autoraise=1): raise NotImplementedError Modified: python/branches/ssize_t/Lib/xmlcore/etree/ElementInclude.py ============================================================================== --- python/branches/ssize_t/Lib/xmlcore/etree/ElementInclude.py (original) +++ python/branches/ssize_t/Lib/xmlcore/etree/ElementInclude.py Mon Jan 2 16:17:17 2006 @@ -141,4 +141,3 @@ else: include(e, loader) i = i + 1 - Modified: python/branches/ssize_t/Lib/xmlcore/etree/ElementPath.py ============================================================================== --- python/branches/ssize_t/Lib/xmlcore/etree/ElementPath.py (original) +++ python/branches/ssize_t/Lib/xmlcore/etree/ElementPath.py Mon Jan 2 16:17:17 2006 @@ -196,4 +196,3 @@ def findall(element, path): return _compile(path).findall(element) - Modified: python/branches/ssize_t/Makefile.pre.in ============================================================================== --- python/branches/ssize_t/Makefile.pre.in (original) +++ python/branches/ssize_t/Makefile.pre.in Mon Jan 2 16:17:17 2006 @@ -254,6 +254,7 @@ Python/modsupport.o \ Python/mystrtoul.o \ Python/mysnprintf.o \ + Python/pyarena.o \ Python/pyfpe.o \ Python/pystate.o \ Python/pythonrun.o \ @@ -348,7 +349,9 @@ $(SIGNAL_OBJS) \ $(MODOBJS) \ $(srcdir)/Modules/getbuildinfo.c - if test -f buildno; then \ + if test -d $(srcdir)/.svn; then \ + svnversion $(srcdir) >buildno; \ + elif test -f buildno; then \ expr `cat buildno` + 1 >buildno1; \ mv -f buildno1 buildno; \ else echo 1 >buildno; fi @@ -443,7 +446,7 @@ # Special rules for object files Modules/getbuildinfo.o: $(srcdir)/Modules/getbuildinfo.c buildno - $(CC) -c $(PY_CFLAGS) -DBUILD=`cat buildno` -o $@ $(srcdir)/Modules/getbuildinfo.c + $(CC) -c $(PY_CFLAGS) -DBUILD=\"`cat buildno`\" -o $@ $(srcdir)/Modules/getbuildinfo.c Modules/getpath.o: $(srcdir)/Modules/getpath.c Makefile $(CC) -c $(PY_CFLAGS) -DPYTHONPATH='"$(PYTHONPATH)"' \ @@ -520,6 +523,7 @@ Include/object.h \ Include/objimpl.h \ Include/patchlevel.h \ + Include/pyarena.h \ Include/pydebug.h \ Include/pyerrors.h \ Include/pyfpe.h \ Modified: python/branches/ssize_t/Misc/ACKS ============================================================================== --- python/branches/ssize_t/Misc/ACKS (original) +++ python/branches/ssize_t/Misc/ACKS Mon Jan 2 16:17:17 2006 @@ -357,6 +357,7 @@ Christopher Lee Inyeol Lee John J. Lee +Thomas Lee Luc Lefebvre Kip Lehman Joerg Lehmann @@ -607,6 +608,7 @@ Bill Tutt Doobee R. Tzeck Lionel Ulmer +Michael Urman Hector Urtubia Dmitry Vasiliev Frank Vercruesse Modified: python/branches/ssize_t/Misc/NEWS ============================================================================== --- python/branches/ssize_t/Misc/NEWS (original) +++ python/branches/ssize_t/Misc/NEWS Mon Jan 2 16:17:17 2006 @@ -12,6 +12,26 @@ Core and builtins ----------------- +- Bug #959576: The pwd module is now builtin. This allows Python to be + built on UNIX platforms without $HOME set. + +- Bug #1072182, fix some potential problems if characters are signed. + +- Bug #889500, fix line number on SyntaxWarning for global declarations. + +- Bug #1378022, UTF-8 files with a leading BOM crashed the interpreter. + +- Support for converting hex strings to floats no longer works. + This was not portable. float('0x3') now raises a ValueError. + +- Patch #1382163: Expose Subversion revision number to Python. New C API + function Py_GetBuildNumber(). New attribute sys.build_number. Build number + is now displayed in interactive prompt banner. + +- Implementation of PEP 341 - Unification of try/except and try/finally. + "except" clauses can now be written together with a "finally" clause in + one try statement instead of two nested ones. Patch #1355913. + - Bug #1379994: Builtin unicode_escape and raw_unicode_escape codec now encodes backslash correctly. @@ -189,6 +209,8 @@ Extension Modules ----------------- +- Patch #1365916: fix some unsafe 64-bit mmap methods. + - Bug #1290333: Added a workaround for cjkcodecs' _codecs_cn build problem on AIX. @@ -295,6 +317,12 @@ Library ------- +- Patch #1157027: cookielib mishandles RFC 2109 cookies in Netscape mode + +- Patch #1117398: cookielib.LWPCookieJar and .MozillaCookieJar now raise + LoadError as documented, instead of IOError. For compatibility, + LoadError subclasses IOError. + - Added the hashlib module. It provides secure hash functions for MD5 and SHA1, 224, 256, 384, and 512. Note that recent developments make the historic MD5 and SHA1 unsuitable for cryptographic-strength applications. @@ -565,8 +593,9 @@ once when a size argument is given. This prevents a buffer overflow in the tokenizer with very long source lines. -- Bug #1083110: ``zlib.decompress.flush()`` would segfault if called immediately - after creating the object, without any intervening ``.decompress()`` calls. +- Bug #1083110: ``zlib.decompress.flush()`` would segfault if called + immediately after creating the object, without any intervening + ``.decompress()`` calls. - The reconvert.quote function can now emit triple-quoted strings. The reconvert module now has some simple documentation. @@ -602,6 +631,8 @@ Build ----- +- Use -xcode=pic32 for CCSHARED on Solaris with SunPro. + - Bug #1189330: configure did not correctly determine the necessary value of LINKCC if python was built with GCC 4.0. @@ -709,6 +740,8 @@ - Patch #1177597: Correct Complex.__init__. +- Fixed a display glitch in Pynche, which could cause the right arrow to + wiggle over by a pixel. What's New in Python 2.4 final? =============================== Modified: python/branches/ssize_t/Modules/Setup.dist ============================================================================== --- python/branches/ssize_t/Modules/Setup.dist (original) +++ python/branches/ssize_t/Modules/Setup.dist Mon Jan 2 16:17:17 2006 @@ -111,6 +111,8 @@ posix posixmodule.c # posix (UNIX) system calls errno errnomodule.c # posix (UNIX) errno values +pwd pwdmodule.c # this is needed to find out the user's home dir + # if $HOME is not set _sre _sre.c # Fredrik Lundh's new regular expressions _codecs _codecsmodule.c # access to the builtin codecs and codec registry @@ -186,7 +188,6 @@ # supported...) #fcntl fcntlmodule.c # fcntl(2) and ioctl(2) -#pwd pwdmodule.c # pwd(3) #spwd spwdmodule.c # spwd(3) #grp grpmodule.c # grp(3) #select selectmodule.c # select(2); not on ancient System V Modified: python/branches/ssize_t/Modules/_elementtree.c ============================================================================== --- python/branches/ssize_t/Modules/_elementtree.c (original) +++ python/branches/ssize_t/Modules/_elementtree.c Mon Jan 2 16:17:17 2006 @@ -1508,10 +1508,12 @@ if (self->data) { if (self->this == self->last) { + Py_DECREF(self->last->text); self->last->text = JOIN_SET( self->data, PyList_CheckExact(self->data) ); } else { + Py_DECREF(self->last->tail); self->last->tail = JOIN_SET( self->data, PyList_CheckExact(self->data) ); @@ -1613,10 +1615,12 @@ if (self->data) { if (self->this == self->last) { + Py_DECREF(self->last->text); self->last->text = JOIN_SET( self->data, PyList_CheckExact(self->data) ); } else { + Py_DECREF(self->last->tail); self->last->tail = JOIN_SET( self->data, PyList_CheckExact(self->data) ); @@ -2149,7 +2153,7 @@ for (i = 0; i < 256; i++) s[i] = i; - u = PyUnicode_Decode((char*)s, 256, name, "replace"); + u = PyUnicode_Decode((char*) s, 256, name, "replace"); if (!u) return XML_STATUS_ERROR; Modified: python/branches/ssize_t/Modules/_hotshot.c ============================================================================== --- python/branches/ssize_t/Modules/_hotshot.c (original) +++ python/branches/ssize_t/Modules/_hotshot.c Mon Jan 2 16:17:17 2006 @@ -1397,7 +1397,7 @@ char *buffer; int i = 0; - while (*rev && !isdigit((int)*rev)) + while (*rev && !isdigit(Py_CHARMASK(*rev))) ++rev; while (rev[i] != ' ' && rev[i] != '\0') ++i; Modified: python/branches/ssize_t/Modules/_tkinter.c ============================================================================== --- python/branches/ssize_t/Modules/_tkinter.c (original) +++ python/branches/ssize_t/Modules/_tkinter.c Mon Jan 2 16:17:17 2006 @@ -636,7 +636,7 @@ } strcpy(argv0, className); - if (isupper((int)(argv0[0]))) + if (isupper(Py_CHARMASK(argv0[0]))) argv0[0] = tolower(argv0[0]); Tcl_SetVar(v->interp, "argv0", argv0, TCL_GLOBAL_ONLY); ckfree(argv0); Modified: python/branches/ssize_t/Modules/getbuildinfo.c ============================================================================== --- python/branches/ssize_t/Modules/getbuildinfo.c (original) +++ python/branches/ssize_t/Modules/getbuildinfo.c Mon Jan 2 16:17:17 2006 @@ -21,7 +21,7 @@ #endif #ifndef BUILD -#define BUILD 0 +#define BUILD "0" #endif const char * @@ -29,6 +29,12 @@ { static char buildinfo[50]; PyOS_snprintf(buildinfo, sizeof(buildinfo), - "#%d, %.20s, %.9s", BUILD, DATE, TIME); + "%s, %.20s, %.9s", BUILD, DATE, TIME); return buildinfo; } + +const char * +Py_GetBuildNumber(void) +{ + return BUILD; +} Modified: python/branches/ssize_t/Modules/mmapmodule.c ============================================================================== --- python/branches/ssize_t/Modules/mmapmodule.c (original) +++ python/branches/ssize_t/Modules/mmapmodule.c Mon Jan 2 16:17:17 2006 @@ -374,7 +374,7 @@ { unsigned long new_size; CHECK_VALID(NULL); - if (!PyArg_ParseTuple (args, "l:resize", &new_size) || + if (!PyArg_ParseTuple (args, "k:resize", &new_size) || !is_resizeable(self)) { return NULL; #ifdef MS_WINDOWS @@ -463,10 +463,10 @@ static PyObject * mmap_flush_method(mmap_object *self, PyObject *args) { - size_t offset = 0; - size_t size = self->size; + unsigned long offset = 0; + unsigned long size = self->size; CHECK_VALID(NULL); - if (!PyArg_ParseTuple (args, "|ll:flush", &offset, &size)) { + if (!PyArg_ParseTuple (args, "|kk:flush", &offset, &size)) { return NULL; } else if ((offset + size) > self->size) { PyErr_SetString (PyExc_ValueError, @@ -539,7 +539,7 @@ { unsigned long dest, src, count; CHECK_VALID(NULL); - if (!PyArg_ParseTuple (args, "iii:move", &dest, &src, &count) || + if (!PyArg_ParseTuple (args, "kkk:move", &dest, &src, &count) || !is_writeable(self)) { return NULL; } else { @@ -863,7 +863,7 @@ PyObject *map_size_obj = NULL; int map_size; int fd, flags = MAP_SHARED, prot = PROT_WRITE | PROT_READ; - access_mode access = ACCESS_DEFAULT; + int access = (int)ACCESS_DEFAULT; static const char *keywords[] = {"fileno", "length", "flags", "prot", "access", NULL}; @@ -876,11 +876,11 @@ if (map_size < 0) return NULL; - if ((access != ACCESS_DEFAULT) && + if ((access != (int)ACCESS_DEFAULT) && ((flags != MAP_SHARED) || ( prot != (PROT_WRITE | PROT_READ)))) return PyErr_Format(PyExc_ValueError, "mmap can't specify both access and flags, prot."); - switch(access) { + switch((access_mode)access) { case ACCESS_READ: flags = MAP_SHARED; prot = PROT_READ; @@ -935,7 +935,7 @@ PyErr_SetFromErrno(mmap_module_error); return NULL; } - m_obj->access = access; + m_obj->access = (access_mode)access; return (PyObject *)m_obj; } #endif /* UNIX */ @@ -951,7 +951,7 @@ DWORD dwErr = 0; int fileno; HANDLE fh = 0; - access_mode access = ACCESS_DEFAULT; + int access = (access_mode)ACCESS_DEFAULT; DWORD flProtect, dwDesiredAccess; static const char *keywords[] = { "fileno", "length", "tagname", @@ -963,7 +963,7 @@ return NULL; } - switch(access) { + switch((access_mode)access) { case ACCESS_READ: flProtect = PAGE_READONLY; dwDesiredAccess = FILE_MAP_READ; @@ -1048,7 +1048,7 @@ else m_obj->tagname = NULL; - m_obj->access = access; + m_obj->access = (access_mode)access; m_obj->map_handle = CreateFileMapping (m_obj->file_handle, NULL, flProtect, Modified: python/branches/ssize_t/Modules/operator.c ============================================================================== --- python/branches/ssize_t/Modules/operator.c (original) +++ python/branches/ssize_t/Modules/operator.c Mon Jan 2 16:17:17 2006 @@ -80,9 +80,23 @@ spam2(op_and_ , PyNumber_And) spam2(op_xor , PyNumber_Xor) spam2(op_or_ , PyNumber_Or) +spam2(op_iadd , PyNumber_InPlaceAdd) +spam2(op_isub , PyNumber_InPlaceSubtract) +spam2(op_imul , PyNumber_InPlaceMultiply) +spam2(op_idiv , PyNumber_InPlaceDivide) +spam2(op_ifloordiv , PyNumber_InPlaceFloorDivide) +spam2(op_itruediv , PyNumber_InPlaceTrueDivide) +spam2(op_imod , PyNumber_InPlaceRemainder) +spam2(op_ilshift , PyNumber_InPlaceLshift) +spam2(op_irshift , PyNumber_InPlaceRshift) +spam2(op_iand , PyNumber_InPlaceAnd) +spam2(op_ixor , PyNumber_InPlaceXor) +spam2(op_ior , PyNumber_InPlaceOr) spami(isSequenceType , PySequence_Check) spam2(op_concat , PySequence_Concat) spamoi(op_repeat , PySequence_Repeat) +spam2(op_iconcat , PySequence_InPlaceConcat) +spamoi(op_irepeat , PySequence_InPlaceRepeat) spami2b(op_contains , PySequence_Contains) spami2b(sequenceIncludes, PySequence_Contains) spami2(indexOf , PySequence_Index) @@ -108,6 +122,15 @@ } static PyObject* +op_ipow(PyObject *s, PyObject *a) +{ + PyObject *a1, *a2; + if (PyArg_UnpackTuple(a,"ipow", 2, 2, &a1, &a2)) + return PyNumber_InPlacePower(a1, a2, Py_None); + return NULL; +} + +static PyObject* is_(PyObject *s, PyObject *a) { PyObject *a1, *a2, *result = NULL; @@ -177,10 +200,10 @@ #undef spam1o #undef spam1o #define spam1(OP,DOC) {#OP, OP, METH_VARARGS, PyDoc_STR(DOC)}, -#define spam2(OP,ALTOP,DOC) {#OP, op_##OP, METH_VARARGS, DOC}, \ +#define spam2(OP,ALTOP,DOC) {#OP, op_##OP, METH_VARARGS, PyDoc_STR(DOC)}, \ {#ALTOP, op_##OP, METH_VARARGS, PyDoc_STR(DOC)}, #define spam1o(OP,DOC) {#OP, OP, METH_O, PyDoc_STR(DOC)}, -#define spam2o(OP,ALTOP,DOC) {#OP, op_##OP, METH_O, DOC}, \ +#define spam2o(OP,ALTOP,DOC) {#OP, op_##OP, METH_O, PyDoc_STR(DOC)}, \ {#ALTOP, op_##OP, METH_O, PyDoc_STR(DOC)}, static struct PyMethodDef operator_methods[] = { @@ -224,17 +247,34 @@ spam2(and_,__and__, "and_(a, b) -- Same as a & b.") spam2(xor,__xor__, "xor(a, b) -- Same as a ^ b.") spam2(or_,__or__, "or_(a, b) -- Same as a | b.") +spam2(iadd,__iadd__, "iadd(a, b) -- Same as a += b.") +spam2(isub,__isub__, "isub(a, b) -- Same as a -= b.") +spam2(imul,__imul__, "imul(a, b) -- Same as a *= b.") +spam2(idiv,__idiv__, "idiv(a, b) -- Same as a /= b when __future__.division is not in effect.") +spam2(ifloordiv,__ifloordiv__, "ifloordiv(a, b) -- Same as a //= b.") +spam2(itruediv,__itruediv__, "itruediv(a, b) -- Same as a /= b when __future__.division is in effect.") +spam2(imod,__imod__, "imod(a, b) -- Same as a %= b.") +spam2(ilshift,__ilshift__, "ilshift(a, b) -- Same as a <<= b.") +spam2(irshift,__irshift__, "irshift(a, b) -- Same as a >>= b.") +spam2(iand,__iand__, "iand(a, b) -- Same as a &= b.") +spam2(ixor,__ixor__, "ixor(a, b) -- Same as a ^= b.") +spam2(ior,__ior__, "ior(a, b) -- Same as a |= b.") spam2(concat,__concat__, "concat(a, b) -- Same as a + b, for a and b sequences.") spam2(repeat,__repeat__, "repeat(a, b) -- Return a * b, where a is a sequence, and b is an integer.") +spam2(iconcat,__iconcat__, + "iconcat(a, b) -- Same as a += b, for a and b sequences.") +spam2(irepeat,__irepeat__, + "irepeat(a, b) -- Same as a *= b, where a is a sequence, and b is an integer.") spam2(getitem,__getitem__, "getitem(a, b) -- Same as a[b].") spam2(setitem,__setitem__, "setitem(a, b, c) -- Same as a[b] = c.") spam2(delitem,__delitem__, "delitem(a, b) -- Same as del a[b].") -spam2(pow,__pow__, "pow(a, b) -- Same as a**b.") +spam2(pow,__pow__, "pow(a, b) -- Same as a ** b.") +spam2(ipow,__ipow__, "ipow(a, b) -- Same as a **= b.") spam2(getslice,__getslice__, "getslice(a, b, c) -- Same as a[b:c].") spam2(setslice,__setslice__, Modified: python/branches/ssize_t/Modules/posixmodule.c ============================================================================== --- python/branches/ssize_t/Modules/posixmodule.c (original) +++ python/branches/ssize_t/Modules/posixmodule.c Mon Jan 2 16:17:17 2006 @@ -463,7 +463,7 @@ if (strlen(msgbuf) > 0) { /* If Non-Empty Msg, Trim CRLF */ char *lastc = &msgbuf[ strlen(msgbuf)-1 ]; - while (lastc > msgbuf && isspace(*lastc)) + while (lastc > msgbuf && isspace(Py_CHARMASK(*lastc))) *lastc-- = '\0'; /* Trim Trailing Whitespace (CRLF) */ } Modified: python/branches/ssize_t/Modules/pyexpat.c ============================================================================== --- python/branches/ssize_t/Modules/pyexpat.c (original) +++ python/branches/ssize_t/Modules/pyexpat.c Mon Jan 2 16:17:17 2006 @@ -1803,7 +1803,7 @@ char *rev = rcsid; int i = 0; - while (!isdigit((int)*rev)) + while (!isdigit(Py_CHARMASK(*rev))) ++rev; while (rev[i] != ' ' && rev[i] != '\0') ++i; Modified: python/branches/ssize_t/Modules/socketmodule.c ============================================================================== --- python/branches/ssize_t/Modules/socketmodule.c (original) +++ python/branches/ssize_t/Modules/socketmodule.c Mon Jan 2 16:17:17 2006 @@ -506,7 +506,8 @@ if (strlen(outbuf) > 0) { /* If non-empty msg, trim CRLF */ char *lastc = &outbuf[ strlen(outbuf)-1 ]; - while (lastc > outbuf && isspace(*lastc)) { + while (lastc > outbuf && + isspace(Py_CHARMASK(*lastc))) { /* Trim trailing whitespace (CRLF) */ *lastc-- = '\0'; } Modified: python/branches/ssize_t/Modules/stropmodule.c ============================================================================== --- python/branches/ssize_t/Modules/stropmodule.c (original) +++ python/branches/ssize_t/Modules/stropmodule.c Mon Jan 2 16:17:17 2006 @@ -757,7 +757,7 @@ x = (long) PyOS_strtoul(s, &end, base); else x = PyOS_strtol(s, &end, base); - if (end == s || !isalnum((int)end[-1])) + if (end == s || !isalnum(Py_CHARMASK(end[-1]))) goto bad; while (*end && isspace(Py_CHARMASK(*end))) end++; Modified: python/branches/ssize_t/Objects/abstract.c ============================================================================== --- python/branches/ssize_t/Objects/abstract.c (original) +++ python/branches/ssize_t/Objects/abstract.c Mon Jan 2 16:17:17 2006 @@ -635,14 +635,11 @@ PyObject *result = binary_op1(v, w, NB_SLOT(nb_add)); if (result == Py_NotImplemented) { PySequenceMethods *m = v->ob_type->tp_as_sequence; + Py_DECREF(result); if (m && m->sq_concat) { - Py_DECREF(result); - result = (*m->sq_concat)(v, w); + return (*m->sq_concat)(v, w); } - if (result == Py_NotImplemented) { - Py_DECREF(result); - return binop_type_error(v, w, "+"); - } + result = binop_type_error(v, w, "+"); } return result; } @@ -1144,6 +1141,15 @@ if (m && m->sq_concat) return m->sq_concat(s, o); + /* Instances of user classes defining an __add__() method only + have an nb_add slot, not an sq_concat slot. So we fall back + to nb_add if both arguments appear to be sequences. */ + if (PySequence_Check(s) && PySequence_Check(o)) { + PyObject *result = binary_op1(s, o, NB_SLOT(nb_add)); + if (result != Py_NotImplemented) + return result; + Py_DECREF(result); + } return type_error("object can't be concatenated"); } @@ -1159,6 +1165,20 @@ if (m && m->sq_repeat) return m->sq_repeat(o, count); + /* Instances of user classes defining a __mul__() method only + have an nb_multiply slot, not an sq_repeat slot. so we fall back + to nb_multiply if o appears to be a sequence. */ + if (PySequence_Check(o)) { + PyObject *n, *result; + n = PyInt_FromLong(count); + if (n == NULL) + return NULL; + result = binary_op1(o, n, NB_SLOT(nb_multiply)); + Py_DECREF(n); + if (result != Py_NotImplemented) + return result; + Py_DECREF(result); + } return type_error("object can't be repeated"); } @@ -1176,6 +1196,13 @@ if (m && m->sq_concat) return m->sq_concat(s, o); + if (PySequence_Check(s) && PySequence_Check(o)) { + PyObject *result = binary_iop1(s, o, NB_SLOT(nb_inplace_add), + NB_SLOT(nb_add)); + if (result != Py_NotImplemented) + return result; + Py_DECREF(result); + } return type_error("object can't be concatenated"); } @@ -1193,6 +1220,18 @@ if (m && m->sq_repeat) return m->sq_repeat(o, count); + if (PySequence_Check(o)) { + PyObject *n, *result; + n = PyInt_FromLong(count); + if (n == NULL) + return NULL; + result = binary_iop1(o, n, NB_SLOT(nb_inplace_multiply), + NB_SLOT(nb_multiply)); + Py_DECREF(n); + if (result != Py_NotImplemented) + return result; + Py_DECREF(result); + } return type_error("object can't be repeated"); } Modified: python/branches/ssize_t/Objects/bufferobject.c ============================================================================== --- python/branches/ssize_t/Objects/bufferobject.c (original) +++ python/branches/ssize_t/Objects/bufferobject.c Mon Jan 2 16:17:17 2006 @@ -356,6 +356,8 @@ return NULL; ob = PyString_FromStringAndSize(NULL, size + count); + if ( ob == NULL ) + return NULL; p = PyString_AS_STRING(ob); memcpy(p, ptr1, size); memcpy(p + size, ptr2, count); Modified: python/branches/ssize_t/Objects/dictobject.c ============================================================================== --- python/branches/ssize_t/Objects/dictobject.c (original) +++ python/branches/ssize_t/Objects/dictobject.c Mon Jan 2 16:17:17 2006 @@ -2,7 +2,7 @@ /* Dictionary object implementation using a hash table */ /* The distribution includes a separate file, Objects/dictnotes.txt, - describing explorations into dictionary design and optimization. + describing explorations into dictionary design and optimization. It covers typical dictionary use patterns, the parameters for tuning dictionaries, and several ideas for possible optimizations. */ @@ -519,10 +519,10 @@ } /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the - * dictionary if it is merely replacing the value for an existing key. - * This is means that it's safe to loop over a dictionary with - * PyDict_Next() and occasionally replace a value -- but you can't - * insert new keys or remove them. + * dictionary if it's merely replacing the value for an existing key. + * This means that it's safe to loop over a dictionary with PyDict_Next() + * and occasionally replace a value -- but you can't insert new keys or + * remove them. */ int PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value) @@ -554,15 +554,15 @@ /* If we added a key, we can safely resize. Otherwise just return! * If fill >= 2/3 size, adjust size. Normally, this doubles or * quaduples the size, but it's also possible for the dict to shrink - * (if ma_fill is much larger than ma_used, meaning a lot of dict + * (if ma_fill is much larger than ma_used, meaning a lot of dict * keys have been * deleted). - * + * * Quadrupling the size improves average dictionary sparseness * (reducing collisions) at the cost of some memory and iteration * speed (which loops over every possible entry). It also halves * the number of expensive resize operations in a growing dictionary. - * - * Very large dictionaries (over 50K items) use doubling instead. + * + * Very large dictionaries (over 50K items) use doubling instead. * This may help applications with severe memory constraints. */ if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2)) @@ -734,7 +734,7 @@ PyMem_DEL(mp->ma_table); if (num_free_dicts < MAXFREEDICTS && mp->ob_type == &PyDict_Type) free_dicts[num_free_dicts++] = mp; - else + else mp->ob_type->tp_free((PyObject *)mp); Py_TRASHCAN_SAFE_END(mp) } @@ -2251,7 +2251,7 @@ Py_DECREF(PyTuple_GET_ITEM(result, 1)); } else { result = PyTuple_New(2); - if (result == NULL) + if (result == NULL) return NULL; } di->len--; Modified: python/branches/ssize_t/Objects/typeobject.c ============================================================================== --- python/branches/ssize_t/Objects/typeobject.c (original) +++ python/branches/ssize_t/Objects/typeobject.c Mon Jan 2 16:17:17 2006 @@ -1288,12 +1288,14 @@ mro_internal(PyTypeObject *type) { PyObject *mro, *result, *tuple; + int checkit = 0; if (type->ob_type == &PyType_Type) { result = mro_implementation(type); } else { static PyObject *mro_str; + checkit = 1; mro = lookup_method((PyObject *)type, "mro", &mro_str); if (mro == NULL) return -1; @@ -1304,6 +1306,39 @@ return -1; tuple = PySequence_Tuple(result); Py_DECREF(result); + if (tuple == NULL) + return -1; + if (checkit) { + int i, len; + PyObject *cls; + PyTypeObject *solid; + + solid = solid_base(type); + + len = PyTuple_GET_SIZE(tuple); + + for (i = 0; i < len; i++) { + PyTypeObject *t; + cls = PyTuple_GET_ITEM(tuple, i); + if (PyClass_Check(cls)) + continue; + else if (!PyType_Check(cls)) { + PyErr_Format(PyExc_TypeError, + "mro() returned a non-class ('%.500s')", + cls->ob_type->tp_name); + Py_DECREF(tuple); + return -1; + } + t = (PyTypeObject*)cls; + if (!PyType_IsSubtype(solid, solid_base(t))) { + PyErr_Format(PyExc_TypeError, + "mro() returned base with unsuitable layout ('%.500s')", + t->tp_name); + Py_DECREF(tuple); + return -1; + } + } + } type->tp_mro = tuple; return 0; } @@ -4096,9 +4131,6 @@ return len; } -SLOT1(slot_sq_concat, "__add__", PyObject *, "O") -SLOT1(slot_sq_repeat, "__mul__", int, "i") - /* Super-optimized version of slot_sq_item. Other slots could do the same... */ static PyObject * @@ -4212,9 +4244,6 @@ return result; } -SLOT1(slot_sq_inplace_concat, "__iadd__", PyObject *, "O") -SLOT1(slot_sq_inplace_repeat, "__imul__", int, "i") - #define slot_mp_length slot_sq_length SLOT1(slot_mp_subscript, "__getitem__", PyObject *, "O") @@ -4927,12 +4956,17 @@ static slotdef slotdefs[] = { SQSLOT("__len__", sq_length, slot_sq_length, wrap_inquiry, "x.__len__() <==> len(x)"), - SQSLOT("__add__", sq_concat, slot_sq_concat, wrap_binaryfunc, - "x.__add__(y) <==> x+y"), - SQSLOT("__mul__", sq_repeat, slot_sq_repeat, wrap_intargfunc, - "x.__mul__(n) <==> x*n"), - SQSLOT("__rmul__", sq_repeat, slot_sq_repeat, wrap_intargfunc, - "x.__rmul__(n) <==> n*x"), + /* Heap types defining __add__/__mul__ have sq_concat/sq_repeat == NULL. + The logic in abstract.c always falls back to nb_add/nb_multiply in + this case. Defining both the nb_* and the sq_* slots to call the + user-defined methods has unexpected side-effects, as shown by + test_descr.notimplemented() */ + SQSLOT("__add__", sq_concat, NULL, wrap_binaryfunc, + "x.__add__(y) <==> x+y"), + SQSLOT("__mul__", sq_repeat, NULL, wrap_intargfunc, + "x.__mul__(n) <==> x*n"), + SQSLOT("__rmul__", sq_repeat, NULL, wrap_intargfunc, + "x.__rmul__(n) <==> n*x"), SQSLOT("__getitem__", sq_item, slot_sq_item, wrap_sq_item, "x.__getitem__(y) <==> x[y]"), SQSLOT("__getslice__", sq_slice, slot_sq_slice, wrap_intintargfunc, @@ -4954,10 +4988,10 @@ Use of negative indices is not supported."), SQSLOT("__contains__", sq_contains, slot_sq_contains, wrap_objobjproc, "x.__contains__(y) <==> y in x"), - SQSLOT("__iadd__", sq_inplace_concat, slot_sq_inplace_concat, - wrap_binaryfunc, "x.__iadd__(y) <==> x+=y"), - SQSLOT("__imul__", sq_inplace_repeat, slot_sq_inplace_repeat, - wrap_intargfunc, "x.__imul__(y) <==> x*=y"), + SQSLOT("__iadd__", sq_inplace_concat, NULL, + wrap_binaryfunc, "x.__iadd__(y) <==> x+=y"), + SQSLOT("__imul__", sq_inplace_repeat, NULL, + wrap_intargfunc, "x.__imul__(y) <==> x*=y"), MPSLOT("__len__", mp_length, slot_mp_length, wrap_inquiry, "x.__len__() <==> len(x)"), Modified: python/branches/ssize_t/PC/_subprocess.c ============================================================================== --- python/branches/ssize_t/PC/_subprocess.c (original) +++ python/branches/ssize_t/PC/_subprocess.c Mon Jan 2 16:17:17 2006 @@ -425,6 +425,26 @@ } static PyObject * +sp_TerminateProcess(PyObject* self, PyObject* args) +{ + BOOL result; + + long process; + int exit_code; + if (! PyArg_ParseTuple(args, "li:TerminateProcess", &process, + &exit_code)) + return NULL; + + result = TerminateProcess((HANDLE) process, exit_code); + + if (! result) + return PyErr_SetFromWindowsErr(GetLastError()); + + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject * sp_GetExitCodeProcess(PyObject* self, PyObject* args) { DWORD exit_code; @@ -498,6 +518,7 @@ {"DuplicateHandle", sp_DuplicateHandle, METH_VARARGS}, {"CreatePipe", sp_CreatePipe, METH_VARARGS}, {"CreateProcess", sp_CreateProcess, METH_VARARGS}, + {"TerminateProcess", sp_TerminateProcess, METH_VARARGS}, {"GetExitCodeProcess", sp_GetExitCodeProcess, METH_VARARGS}, {"WaitForSingleObject", sp_WaitForSingleObject, METH_VARARGS}, {"GetVersion", sp_GetVersion, METH_VARARGS}, Modified: python/branches/ssize_t/PC/_winreg.c ============================================================================== --- python/branches/ssize_t/PC/_winreg.c (original) +++ python/branches/ssize_t/PC/_winreg.c Mon Jan 2 16:17:17 2006 @@ -455,7 +455,7 @@ /* fwd declare __getattr__ */ -static PyObject *PyHKEY_getattr(PyObject *self, char *name); +static PyObject *PyHKEY_getattr(PyObject *self, const char *name); /* The type itself */ PyTypeObject PyHKEY_Type = @@ -526,7 +526,7 @@ }; /*static*/ PyObject * -PyHKEY_getattr(PyObject *self, char *name) +PyHKEY_getattr(PyObject *self, const char *name) { PyObject *res; Modified: python/branches/ssize_t/PCbuild/pcbuild.sln ============================================================================== Binary files. No diff available. Modified: python/branches/ssize_t/PCbuild/pythoncore.vcproj ============================================================================== --- python/branches/ssize_t/PCbuild/pythoncore.vcproj (original) +++ python/branches/ssize_t/PCbuild/pythoncore.vcproj Mon Jan 2 16:17:17 2006 @@ -1428,7 +1428,7 @@ Name="VCCLCompilerTool" Optimization="2" AdditionalIncludeDirectories="" - PreprocessorDefinitions="BUILD=60"/> + PreprocessorDefinitions="BUILD=\"60\""/> @@ -1436,7 +1436,7 @@ Name="VCCLCompilerTool" Optimization="0" AdditionalIncludeDirectories="" - PreprocessorDefinitions="BUILD=60"/> + PreprocessorDefinitions="BUILD=\"60\""/> @@ -1444,7 +1444,7 @@ Name="VCCLCompilerTool" Optimization="2" AdditionalIncludeDirectories="" - PreprocessorDefinitions="BUILD=60"/> + PreprocessorDefinitions="BUILD=\"60\""/> - - @@ -2516,6 +2513,9 @@ + + @@ -3224,7 +3224,31 @@ + RelativePath="..\Parser\tokenizer.c"> + + + + + + + + + Modified: python/branches/ssize_t/Parser/asdl.py ============================================================================== --- python/branches/ssize_t/Parser/asdl.py (original) +++ python/branches/ssize_t/Parser/asdl.py Mon Jan 2 16:17:17 2006 @@ -142,7 +142,7 @@ def p_product(self, (_0, fields, _1)): " product ::= ( fields ) " # XXX can't I just construct things in the right order? - fields.reverse() + fields.reverse() return Product(fields) def p_sum_0(self, (constructor,)): @@ -164,7 +164,7 @@ def p_constructor_1(self, (id, _0, fields, _1)): " constructor ::= Id ( fields ) " # XXX can't I just construct things in the right order? - fields.reverse() + fields.reverse() return Constructor(id, fields) def p_fields_0(self, (field,)): @@ -355,7 +355,7 @@ v.errors += 1 uses = ", ".join(v.types[t]) print "Undefined type %s, used in %s" % (t, uses) - + return not v.errors def parse(file): @@ -380,7 +380,7 @@ else: testdir = "tests" files = glob.glob(testdir + "/*.asdl") - + for file in files: print file mod = parse(file) Modified: python/branches/ssize_t/Parser/asdl_c.py ============================================================================== --- python/branches/ssize_t/Parser/asdl_c.py (original) +++ python/branches/ssize_t/Parser/asdl_c.py Mon Jan 2 16:17:17 2006 @@ -249,8 +249,9 @@ if args: argstr = ", ".join(["%s %s" % (atype, aname) for atype, aname, opt in args]) + argstr += ", PyArena *arena" else: - argstr = "void" + argstr = "PyArena *arena" self.emit("%s %s(%s);" % (ctype, name, argstr), 0) def visitProduct(self, prod, name): @@ -265,6 +266,10 @@ self.emit(s, depth, reflow) argstr = ", ".join(["%s %s" % (atype, aname) for atype, aname, opt in args + attrs]) + if argstr: + argstr += ", PyArena *arena" + else: + argstr = "PyArena *arena" self.emit("%s" % ctype, 0) emit("%s(%s)" % (name, argstr)) emit("{") @@ -280,7 +285,7 @@ emit('return NULL;', 2) emit('}', 1) - emit("p = (%s)malloc(sizeof(*p));" % ctype, 1) + emit("p = (%s)PyArena_Malloc(arena, sizeof(*p));" % ctype, 1); emit("if (!p) {", 1) emit("PyErr_NoMemory();", 2) emit("return NULL;", 2) @@ -434,7 +439,7 @@ self.emit("", 0) self.emit("free(o);", 1) self.func_end() - + def visitConstructor(self, cons, enum, name): self.emit("case %s_kind:" % cons.name, 1) for f in cons.fields: @@ -482,76 +487,76 @@ else: ctype = get_c_type(field.type) self.emit("free_%s((%s)%s);" % (field.type, ctype, value), depth) - + class MarshalUtilVisitor(StaticVisitor): CODE = ''' #define CHECKSIZE(BUF, OFF, MIN) { \\ - int need = *(OFF) + MIN; \\ - if (need >= PyString_GET_SIZE(*(BUF))) { \\ - int newsize = PyString_GET_SIZE(*(BUF)) * 2; \\ - if (newsize < need) \\ - newsize = need; \\ - if (_PyString_Resize((BUF), newsize) < 0) \\ - return 0; \\ - } \\ -} + int need = *(OFF) + MIN; \\ + if (need >= PyString_GET_SIZE(*(BUF))) { \\ + int newsize = PyString_GET_SIZE(*(BUF)) * 2; \\ + if (newsize < need) \\ + newsize = need; \\ + if (_PyString_Resize((BUF), newsize) < 0) \\ + return 0; \\ + } \\ +} -static int +static int marshal_write_int(PyObject **buf, int *offset, int x) { - char *s; + char *s; - CHECKSIZE(buf, offset, 4) - s = PyString_AS_STRING(*buf) + (*offset); - s[0] = (x & 0xff); - s[1] = (x >> 8) & 0xff; - s[2] = (x >> 16) & 0xff; - s[3] = (x >> 24) & 0xff; - *offset += 4; - return 1; + CHECKSIZE(buf, offset, 4) + s = PyString_AS_STRING(*buf) + (*offset); + s[0] = (x & 0xff); + s[1] = (x >> 8) & 0xff; + s[2] = (x >> 16) & 0xff; + s[3] = (x >> 24) & 0xff; + *offset += 4; + return 1; } -static int +static int marshal_write_bool(PyObject **buf, int *offset, bool b) { - if (b) - marshal_write_int(buf, offset, 1); - else - marshal_write_int(buf, offset, 0); - return 1; + if (b) + marshal_write_int(buf, offset, 1); + else + marshal_write_int(buf, offset, 0); + return 1; } -static int +static int marshal_write_identifier(PyObject **buf, int *offset, identifier id) { - int l = PyString_GET_SIZE(id); - marshal_write_int(buf, offset, l); - CHECKSIZE(buf, offset, l); - memcpy(PyString_AS_STRING(*buf) + *offset, - PyString_AS_STRING(id), l); - *offset += l; - return 1; + int l = PyString_GET_SIZE(id); + marshal_write_int(buf, offset, l); + CHECKSIZE(buf, offset, l); + memcpy(PyString_AS_STRING(*buf) + *offset, + PyString_AS_STRING(id), l); + *offset += l; + return 1; } -static int +static int marshal_write_string(PyObject **buf, int *offset, string s) { - int len = PyString_GET_SIZE(s); - marshal_write_int(buf, offset, len); - CHECKSIZE(buf, offset, len); - memcpy(PyString_AS_STRING(*buf) + *offset, - PyString_AS_STRING(s), len); - *offset += len; - return 1; + int len = PyString_GET_SIZE(s); + marshal_write_int(buf, offset, len); + CHECKSIZE(buf, offset, len); + memcpy(PyString_AS_STRING(*buf) + *offset, + PyString_AS_STRING(s), len); + *offset += len; + return 1; } -static int +static int marshal_write_object(PyObject **buf, int *offset, object s) { - /* XXX */ - return 0; + /* XXX */ + return 0; } ''' @@ -570,7 +575,7 @@ self.emit("return 1;", 1) self.emit("}", 0) self.emit("", 0) - + def visitSum(self, sum, name): self.func_begin(name, has_sequence(sum.types, False)) simple = is_simple(sum) @@ -589,7 +594,7 @@ for field in prod.fields: self.visitField(field, name, 1, 1) self.func_end() - + def visitConstructor(self, cons, enum, name, simple): if simple: self.emit("case %s:" % cons.name, 1) @@ -655,7 +660,7 @@ c = ChainOfVisitors(TypeDefVisitor(f), StructVisitor(f), PrototypeVisitor(f), - FreePrototypeVisitor(f), +## FreePrototypeVisitor(f), ) c.visit(mod) f.close() @@ -671,8 +676,8 @@ print >> f v = ChainOfVisitors(MarshalPrototypeVisitor(f), FunctionVisitor(f), - FreeUtilVisitor(f), - FreeVisitor(f), +## FreeUtilVisitor(f), +## FreeVisitor(f), MarshalUtilVisitor(f), MarshalFunctionVisitor(f), ) Modified: python/branches/ssize_t/Parser/grammar.c ============================================================================== --- python/branches/ssize_t/Parser/grammar.c (original) +++ python/branches/ssize_t/Parser/grammar.c Mon Jan 2 16:17:17 2006 @@ -180,7 +180,8 @@ } if (lb->lb_type == STRING) { - if (isalpha((int)(lb->lb_str[1])) || lb->lb_str[1] == '_') { + if (isalpha(Py_CHARMASK(lb->lb_str[1])) || + lb->lb_str[1] == '_') { char *p; char *src; char *dest; Modified: python/branches/ssize_t/Parser/spark.py ============================================================================== --- python/branches/ssize_t/Parser/spark.py (original) +++ python/branches/ssize_t/Parser/spark.py Mon Jan 2 16:17:17 2006 @@ -1,5 +1,5 @@ # Copyright (c) 1998-2002 John Aycock -# +# # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including @@ -7,10 +7,10 @@ # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: -# +# # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. @@ -26,651 +26,651 @@ import string def _namelist(instance): - namelist, namedict, classlist = [], {}, [instance.__class__] - for c in classlist: - for b in c.__bases__: - classlist.append(b) - for name in c.__dict__.keys(): - if not namedict.has_key(name): - namelist.append(name) - namedict[name] = 1 - return namelist + namelist, namedict, classlist = [], {}, [instance.__class__] + for c in classlist: + for b in c.__bases__: + classlist.append(b) + for name in c.__dict__.keys(): + if not namedict.has_key(name): + namelist.append(name) + namedict[name] = 1 + return namelist class GenericScanner: - def __init__(self, flags=0): - pattern = self.reflect() - self.re = re.compile(pattern, re.VERBOSE|flags) - - self.index2func = {} - for name, number in self.re.groupindex.items(): - self.index2func[number-1] = getattr(self, 't_' + name) - - def makeRE(self, name): - doc = getattr(self, name).__doc__ - rv = '(?P<%s>%s)' % (name[2:], doc) - return rv - - def reflect(self): - rv = [] - for name in _namelist(self): - if name[:2] == 't_' and name != 't_default': - rv.append(self.makeRE(name)) - - rv.append(self.makeRE('t_default')) - return string.join(rv, '|') - - def error(self, s, pos): - print "Lexical error at position %s" % pos - raise SystemExit - - def tokenize(self, s): - pos = 0 - n = len(s) - while pos < n: - m = self.re.match(s, pos) - if m is None: - self.error(s, pos) - - groups = m.groups() - for i in range(len(groups)): - if groups[i] and self.index2func.has_key(i): - self.index2func[i](groups[i]) - pos = m.end() - - def t_default(self, s): - r'( . | \n )+' - print "Specification error: unmatched input" - raise SystemExit + def __init__(self, flags=0): + pattern = self.reflect() + self.re = re.compile(pattern, re.VERBOSE|flags) + + self.index2func = {} + for name, number in self.re.groupindex.items(): + self.index2func[number-1] = getattr(self, 't_' + name) + + def makeRE(self, name): + doc = getattr(self, name).__doc__ + rv = '(?P<%s>%s)' % (name[2:], doc) + return rv + + def reflect(self): + rv = [] + for name in _namelist(self): + if name[:2] == 't_' and name != 't_default': + rv.append(self.makeRE(name)) + + rv.append(self.makeRE('t_default')) + return string.join(rv, '|') + + def error(self, s, pos): + print "Lexical error at position %s" % pos + raise SystemExit + + def tokenize(self, s): + pos = 0 + n = len(s) + while pos < n: + m = self.re.match(s, pos) + if m is None: + self.error(s, pos) + + groups = m.groups() + for i in range(len(groups)): + if groups[i] and self.index2func.has_key(i): + self.index2func[i](groups[i]) + pos = m.end() + + def t_default(self, s): + r'( . | \n )+' + print "Specification error: unmatched input" + raise SystemExit # # Extracted from GenericParser and made global so that [un]picking works. # class _State: - def __init__(self, stateno, items): - self.T, self.complete, self.items = [], [], items - self.stateno = stateno + def __init__(self, stateno, items): + self.T, self.complete, self.items = [], [], items + self.stateno = stateno class GenericParser: - # - # An Earley parser, as per J. Earley, "An Efficient Context-Free - # Parsing Algorithm", CACM 13(2), pp. 94-102. Also J. C. Earley, - # "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis, - # Carnegie-Mellon University, August 1968. New formulation of - # the parser according to J. Aycock, "Practical Earley Parsing - # and the SPARK Toolkit", Ph.D. thesis, University of Victoria, - # 2001, and J. Aycock and R. N. Horspool, "Practical Earley - # Parsing", unpublished paper, 2001. - # - - def __init__(self, start): - self.rules = {} - self.rule2func = {} - self.rule2name = {} - self.collectRules() - self.augment(start) - self.ruleschanged = 1 - - _NULLABLE = '\e_' - _START = 'START' - _BOF = '|-' - - # - # When pickling, take the time to generate the full state machine; - # some information is then extraneous, too. Unfortunately we - # can't save the rule2func map. - # - def __getstate__(self): - if self.ruleschanged: - # - # XXX - duplicated from parse() - # - self.computeNull() - self.newrules = {} - self.new2old = {} - self.makeNewRules() - self.ruleschanged = 0 - self.edges, self.cores = {}, {} - self.states = { 0: self.makeState0() } - self.makeState(0, self._BOF) - # - # XXX - should find a better way to do this.. - # - changes = 1 - while changes: - changes = 0 - for k, v in self.edges.items(): - if v is None: - state, sym = k - if self.states.has_key(state): - self.goto(state, sym) - changes = 1 - rv = self.__dict__.copy() - for s in self.states.values(): - del s.items - del rv['rule2func'] - del rv['nullable'] - del rv['cores'] - return rv - - def __setstate__(self, D): - self.rules = {} - self.rule2func = {} - self.rule2name = {} - self.collectRules() - start = D['rules'][self._START][0][1][1] # Blech. - self.augment(start) - D['rule2func'] = self.rule2func - D['makeSet'] = self.makeSet_fast - self.__dict__ = D - - # - # A hook for GenericASTBuilder and GenericASTMatcher. Mess - # thee not with this; nor shall thee toucheth the _preprocess - # argument to addRule. - # - def preprocess(self, rule, func): return rule, func - - def addRule(self, doc, func, _preprocess=1): - fn = func - rules = string.split(doc) - - index = [] - for i in range(len(rules)): - if rules[i] == '::=': - index.append(i-1) - index.append(len(rules)) - - for i in range(len(index)-1): - lhs = rules[index[i]] - rhs = rules[index[i]+2:index[i+1]] - rule = (lhs, tuple(rhs)) - - if _preprocess: - rule, fn = self.preprocess(rule, func) - - if self.rules.has_key(lhs): - self.rules[lhs].append(rule) - else: - self.rules[lhs] = [ rule ] - self.rule2func[rule] = fn - self.rule2name[rule] = func.__name__[2:] - self.ruleschanged = 1 - - def collectRules(self): - for name in _namelist(self): - if name[:2] == 'p_': - func = getattr(self, name) - doc = func.__doc__ - self.addRule(doc, func) - - def augment(self, start): - rule = '%s ::= %s %s' % (self._START, self._BOF, start) - self.addRule(rule, lambda args: args[1], 0) - - def computeNull(self): - self.nullable = {} - tbd = [] - - for rulelist in self.rules.values(): - lhs = rulelist[0][0] - self.nullable[lhs] = 0 - for rule in rulelist: - rhs = rule[1] - if len(rhs) == 0: - self.nullable[lhs] = 1 - continue - # - # We only need to consider rules which - # consist entirely of nonterminal symbols. - # This should be a savings on typical - # grammars. - # - for sym in rhs: - if not self.rules.has_key(sym): - break - else: - tbd.append(rule) - changes = 1 - while changes: - changes = 0 - for lhs, rhs in tbd: - if self.nullable[lhs]: - continue - for sym in rhs: - if not self.nullable[sym]: - break - else: - self.nullable[lhs] = 1 - changes = 1 - - def makeState0(self): - s0 = _State(0, []) - for rule in self.newrules[self._START]: - s0.items.append((rule, 0)) - return s0 - - def finalState(self, tokens): - # - # Yuck. - # - if len(self.newrules[self._START]) == 2 and len(tokens) == 0: - return 1 - start = self.rules[self._START][0][1][1] - return self.goto(1, start) - - def makeNewRules(self): - worklist = [] - for rulelist in self.rules.values(): - for rule in rulelist: - worklist.append((rule, 0, 1, rule)) - - for rule, i, candidate, oldrule in worklist: - lhs, rhs = rule - n = len(rhs) - while i < n: - sym = rhs[i] - if not self.rules.has_key(sym) or \ - not self.nullable[sym]: - candidate = 0 - i = i + 1 - continue - - newrhs = list(rhs) - newrhs[i] = self._NULLABLE+sym - newrule = (lhs, tuple(newrhs)) - worklist.append((newrule, i+1, - candidate, oldrule)) - candidate = 0 - i = i + 1 - else: - if candidate: - lhs = self._NULLABLE+lhs - rule = (lhs, rhs) - if self.newrules.has_key(lhs): - self.newrules[lhs].append(rule) - else: - self.newrules[lhs] = [ rule ] - self.new2old[rule] = oldrule - - def typestring(self, token): - return None - - def error(self, token): - print "Syntax error at or near `%s' token" % token - raise SystemExit - - def parse(self, tokens): - sets = [ [(1,0), (2,0)] ] - self.links = {} - - if self.ruleschanged: - self.computeNull() - self.newrules = {} - self.new2old = {} - self.makeNewRules() - self.ruleschanged = 0 - self.edges, self.cores = {}, {} - self.states = { 0: self.makeState0() } - self.makeState(0, self._BOF) - - for i in xrange(len(tokens)): - sets.append([]) - - if sets[i] == []: - break - self.makeSet(tokens[i], sets, i) - else: - sets.append([]) - self.makeSet(None, sets, len(tokens)) - - #_dump(tokens, sets, self.states) - - finalitem = (self.finalState(tokens), 0) - if finalitem not in sets[-2]: - if len(tokens) > 0: - self.error(tokens[i-1]) - else: - self.error(None) - - return self.buildTree(self._START, finalitem, - tokens, len(sets)-2) - - def isnullable(self, sym): - # - # For symbols in G_e only. If we weren't supporting 1.5, - # could just use sym.startswith(). - # - return self._NULLABLE == sym[0:len(self._NULLABLE)] - - def skip(self, (lhs, rhs), pos=0): - n = len(rhs) - while pos < n: - if not self.isnullable(rhs[pos]): - break - pos = pos + 1 - return pos - - def makeState(self, state, sym): - assert sym is not None - # - # Compute \epsilon-kernel state's core and see if - # it exists already. - # - kitems = [] - for rule, pos in self.states[state].items: - lhs, rhs = rule - if rhs[pos:pos+1] == (sym,): - kitems.append((rule, self.skip(rule, pos+1))) - core = kitems - - core.sort() - tcore = tuple(core) - if self.cores.has_key(tcore): - return self.cores[tcore] - # - # Nope, doesn't exist. Compute it and the associated - # \epsilon-nonkernel state together; we'll need it right away. - # - k = self.cores[tcore] = len(self.states) - K, NK = _State(k, kitems), _State(k+1, []) - self.states[k] = K - predicted = {} - - edges = self.edges - rules = self.newrules - for X in K, NK: - worklist = X.items - for item in worklist: - rule, pos = item - lhs, rhs = rule - if pos == len(rhs): - X.complete.append(rule) - continue - - nextSym = rhs[pos] - key = (X.stateno, nextSym) - if not rules.has_key(nextSym): - if not edges.has_key(key): - edges[key] = None - X.T.append(nextSym) - else: - edges[key] = None - if not predicted.has_key(nextSym): - predicted[nextSym] = 1 - for prule in rules[nextSym]: - ppos = self.skip(prule) - new = (prule, ppos) - NK.items.append(new) - # - # Problem: we know K needs generating, but we - # don't yet know about NK. Can't commit anything - # regarding NK to self.edges until we're sure. Should - # we delay committing on both K and NK to avoid this - # hacky code? This creates other problems.. - # - if X is K: - edges = {} - - if NK.items == []: - return k - - # - # Check for \epsilon-nonkernel's core. Unfortunately we - # need to know the entire set of predicted nonterminals - # to do this without accidentally duplicating states. - # - core = predicted.keys() - core.sort() - tcore = tuple(core) - if self.cores.has_key(tcore): - self.edges[(k, None)] = self.cores[tcore] - return k - - nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno - self.edges.update(edges) - self.states[nk] = NK - return k - - def goto(self, state, sym): - key = (state, sym) - if not self.edges.has_key(key): - # - # No transitions from state on sym. - # - return None - - rv = self.edges[key] - if rv is None: - # - # Target state isn't generated yet. Remedy this. - # - rv = self.makeState(state, sym) - self.edges[key] = rv - return rv - - def gotoT(self, state, t): - return [self.goto(state, t)] - - def gotoST(self, state, st): - rv = [] - for t in self.states[state].T: - if st == t: - rv.append(self.goto(state, t)) - return rv - - def add(self, set, item, i=None, predecessor=None, causal=None): - if predecessor is None: - if item not in set: - set.append(item) - else: - key = (item, i) - if item not in set: - self.links[key] = [] - set.append(item) - self.links[key].append((predecessor, causal)) - - def makeSet(self, token, sets, i): - cur, next = sets[i], sets[i+1] - - ttype = token is not None and self.typestring(token) or None - if ttype is not None: - fn, arg = self.gotoT, ttype - else: - fn, arg = self.gotoST, token - - for item in cur: - ptr = (item, i) - state, parent = item - add = fn(state, arg) - for k in add: - if k is not None: - self.add(next, (k, parent), i+1, ptr) - nk = self.goto(k, None) - if nk is not None: - self.add(next, (nk, i+1)) - - if parent == i: - continue - - for rule in self.states[state].complete: - lhs, rhs = rule - for pitem in sets[parent]: - pstate, pparent = pitem - k = self.goto(pstate, lhs) - if k is not None: - why = (item, i, rule) - pptr = (pitem, parent) - self.add(cur, (k, pparent), - i, pptr, why) - nk = self.goto(k, None) - if nk is not None: - self.add(cur, (nk, i)) - - def makeSet_fast(self, token, sets, i): - # - # Call *only* when the entire state machine has been built! - # It relies on self.edges being filled in completely, and - # then duplicates and inlines code to boost speed at the - # cost of extreme ugliness. - # - cur, next = sets[i], sets[i+1] - ttype = token is not None and self.typestring(token) or None - - for item in cur: - ptr = (item, i) - state, parent = item - if ttype is not None: - k = self.edges.get((state, ttype), None) - if k is not None: - #self.add(next, (k, parent), i+1, ptr) - #INLINED --v - new = (k, parent) - key = (new, i+1) - if new not in next: - self.links[key] = [] - next.append(new) - self.links[key].append((ptr, None)) - #INLINED --^ - #nk = self.goto(k, None) - nk = self.edges.get((k, None), None) - if nk is not None: - #self.add(next, (nk, i+1)) - #INLINED --v - new = (nk, i+1) - if new not in next: - next.append(new) - #INLINED --^ - else: - add = self.gotoST(state, token) - for k in add: - if k is not None: - self.add(next, (k, parent), i+1, ptr) - #nk = self.goto(k, None) - nk = self.edges.get((k, None), None) - if nk is not None: - self.add(next, (nk, i+1)) - - if parent == i: - continue - - for rule in self.states[state].complete: - lhs, rhs = rule - for pitem in sets[parent]: - pstate, pparent = pitem - #k = self.goto(pstate, lhs) - k = self.edges.get((pstate, lhs), None) - if k is not None: - why = (item, i, rule) - pptr = (pitem, parent) - #self.add(cur, (k, pparent), - # i, pptr, why) - #INLINED --v - new = (k, pparent) - key = (new, i) - if new not in cur: - self.links[key] = [] - cur.append(new) - self.links[key].append((pptr, why)) - #INLINED --^ - #nk = self.goto(k, None) - nk = self.edges.get((k, None), None) - if nk is not None: - #self.add(cur, (nk, i)) - #INLINED --v - new = (nk, i) - if new not in cur: - cur.append(new) - #INLINED --^ - - def predecessor(self, key, causal): - for p, c in self.links[key]: - if c == causal: - return p - assert 0 - - def causal(self, key): - links = self.links[key] - if len(links) == 1: - return links[0][1] - choices = [] - rule2cause = {} - for p, c in links: - rule = c[2] - choices.append(rule) - rule2cause[rule] = c - return rule2cause[self.ambiguity(choices)] - - def deriveEpsilon(self, nt): - if len(self.newrules[nt]) > 1: - rule = self.ambiguity(self.newrules[nt]) - else: - rule = self.newrules[nt][0] - #print rule - - rhs = rule[1] - attr = [None] * len(rhs) - - for i in range(len(rhs)-1, -1, -1): - attr[i] = self.deriveEpsilon(rhs[i]) - return self.rule2func[self.new2old[rule]](attr) - - def buildTree(self, nt, item, tokens, k): - state, parent = item - - choices = [] - for rule in self.states[state].complete: - if rule[0] == nt: - choices.append(rule) - rule = choices[0] - if len(choices) > 1: - rule = self.ambiguity(choices) - #print rule - - rhs = rule[1] - attr = [None] * len(rhs) - - for i in range(len(rhs)-1, -1, -1): - sym = rhs[i] - if not self.newrules.has_key(sym): - if sym != self._BOF: - attr[i] = tokens[k-1] - key = (item, k) - item, k = self.predecessor(key, None) - #elif self.isnullable(sym): - elif self._NULLABLE == sym[0:len(self._NULLABLE)]: - attr[i] = self.deriveEpsilon(sym) - else: - key = (item, k) - why = self.causal(key) - attr[i] = self.buildTree(sym, why[0], - tokens, why[1]) - item, k = self.predecessor(key, why) - return self.rule2func[self.new2old[rule]](attr) - - def ambiguity(self, rules): - # - # XXX - problem here and in collectRules() if the same rule - # appears in >1 method. Also undefined results if rules - # causing the ambiguity appear in the same method. - # - sortlist = [] - name2index = {} - for i in range(len(rules)): - lhs, rhs = rule = rules[i] - name = self.rule2name[self.new2old[rule]] - sortlist.append((len(rhs), name)) - name2index[name] = i - sortlist.sort() - list = map(lambda (a,b): b, sortlist) - return rules[name2index[self.resolve(list)]] - - def resolve(self, list): - # - # Resolve ambiguity in favor of the shortest RHS. - # Since we walk the tree from the top down, this - # should effectively resolve in favor of a "shift". - # - return list[0] + # + # An Earley parser, as per J. Earley, "An Efficient Context-Free + # Parsing Algorithm", CACM 13(2), pp. 94-102. Also J. C. Earley, + # "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis, + # Carnegie-Mellon University, August 1968. New formulation of + # the parser according to J. Aycock, "Practical Earley Parsing + # and the SPARK Toolkit", Ph.D. thesis, University of Victoria, + # 2001, and J. Aycock and R. N. Horspool, "Practical Earley + # Parsing", unpublished paper, 2001. + # + + def __init__(self, start): + self.rules = {} + self.rule2func = {} + self.rule2name = {} + self.collectRules() + self.augment(start) + self.ruleschanged = 1 + + _NULLABLE = '\e_' + _START = 'START' + _BOF = '|-' + + # + # When pickling, take the time to generate the full state machine; + # some information is then extraneous, too. Unfortunately we + # can't save the rule2func map. + # + def __getstate__(self): + if self.ruleschanged: + # + # XXX - duplicated from parse() + # + self.computeNull() + self.newrules = {} + self.new2old = {} + self.makeNewRules() + self.ruleschanged = 0 + self.edges, self.cores = {}, {} + self.states = { 0: self.makeState0() } + self.makeState(0, self._BOF) + # + # XXX - should find a better way to do this.. + # + changes = 1 + while changes: + changes = 0 + for k, v in self.edges.items(): + if v is None: + state, sym = k + if self.states.has_key(state): + self.goto(state, sym) + changes = 1 + rv = self.__dict__.copy() + for s in self.states.values(): + del s.items + del rv['rule2func'] + del rv['nullable'] + del rv['cores'] + return rv + + def __setstate__(self, D): + self.rules = {} + self.rule2func = {} + self.rule2name = {} + self.collectRules() + start = D['rules'][self._START][0][1][1] # Blech. + self.augment(start) + D['rule2func'] = self.rule2func + D['makeSet'] = self.makeSet_fast + self.__dict__ = D + + # + # A hook for GenericASTBuilder and GenericASTMatcher. Mess + # thee not with this; nor shall thee toucheth the _preprocess + # argument to addRule. + # + def preprocess(self, rule, func): return rule, func + + def addRule(self, doc, func, _preprocess=1): + fn = func + rules = string.split(doc) + + index = [] + for i in range(len(rules)): + if rules[i] == '::=': + index.append(i-1) + index.append(len(rules)) + + for i in range(len(index)-1): + lhs = rules[index[i]] + rhs = rules[index[i]+2:index[i+1]] + rule = (lhs, tuple(rhs)) + + if _preprocess: + rule, fn = self.preprocess(rule, func) + + if self.rules.has_key(lhs): + self.rules[lhs].append(rule) + else: + self.rules[lhs] = [ rule ] + self.rule2func[rule] = fn + self.rule2name[rule] = func.__name__[2:] + self.ruleschanged = 1 + + def collectRules(self): + for name in _namelist(self): + if name[:2] == 'p_': + func = getattr(self, name) + doc = func.__doc__ + self.addRule(doc, func) + + def augment(self, start): + rule = '%s ::= %s %s' % (self._START, self._BOF, start) + self.addRule(rule, lambda args: args[1], 0) + + def computeNull(self): + self.nullable = {} + tbd = [] + + for rulelist in self.rules.values(): + lhs = rulelist[0][0] + self.nullable[lhs] = 0 + for rule in rulelist: + rhs = rule[1] + if len(rhs) == 0: + self.nullable[lhs] = 1 + continue + # + # We only need to consider rules which + # consist entirely of nonterminal symbols. + # This should be a savings on typical + # grammars. + # + for sym in rhs: + if not self.rules.has_key(sym): + break + else: + tbd.append(rule) + changes = 1 + while changes: + changes = 0 + for lhs, rhs in tbd: + if self.nullable[lhs]: + continue + for sym in rhs: + if not self.nullable[sym]: + break + else: + self.nullable[lhs] = 1 + changes = 1 + + def makeState0(self): + s0 = _State(0, []) + for rule in self.newrules[self._START]: + s0.items.append((rule, 0)) + return s0 + + def finalState(self, tokens): + # + # Yuck. + # + if len(self.newrules[self._START]) == 2 and len(tokens) == 0: + return 1 + start = self.rules[self._START][0][1][1] + return self.goto(1, start) + + def makeNewRules(self): + worklist = [] + for rulelist in self.rules.values(): + for rule in rulelist: + worklist.append((rule, 0, 1, rule)) + + for rule, i, candidate, oldrule in worklist: + lhs, rhs = rule + n = len(rhs) + while i < n: + sym = rhs[i] + if not self.rules.has_key(sym) or \ + not self.nullable[sym]: + candidate = 0 + i = i + 1 + continue + + newrhs = list(rhs) + newrhs[i] = self._NULLABLE+sym + newrule = (lhs, tuple(newrhs)) + worklist.append((newrule, i+1, + candidate, oldrule)) + candidate = 0 + i = i + 1 + else: + if candidate: + lhs = self._NULLABLE+lhs + rule = (lhs, rhs) + if self.newrules.has_key(lhs): + self.newrules[lhs].append(rule) + else: + self.newrules[lhs] = [ rule ] + self.new2old[rule] = oldrule + + def typestring(self, token): + return None + + def error(self, token): + print "Syntax error at or near `%s' token" % token + raise SystemExit + + def parse(self, tokens): + sets = [ [(1,0), (2,0)] ] + self.links = {} + + if self.ruleschanged: + self.computeNull() + self.newrules = {} + self.new2old = {} + self.makeNewRules() + self.ruleschanged = 0 + self.edges, self.cores = {}, {} + self.states = { 0: self.makeState0() } + self.makeState(0, self._BOF) + + for i in xrange(len(tokens)): + sets.append([]) + + if sets[i] == []: + break + self.makeSet(tokens[i], sets, i) + else: + sets.append([]) + self.makeSet(None, sets, len(tokens)) + + #_dump(tokens, sets, self.states) + + finalitem = (self.finalState(tokens), 0) + if finalitem not in sets[-2]: + if len(tokens) > 0: + self.error(tokens[i-1]) + else: + self.error(None) + + return self.buildTree(self._START, finalitem, + tokens, len(sets)-2) + + def isnullable(self, sym): + # + # For symbols in G_e only. If we weren't supporting 1.5, + # could just use sym.startswith(). + # + return self._NULLABLE == sym[0:len(self._NULLABLE)] + + def skip(self, (lhs, rhs), pos=0): + n = len(rhs) + while pos < n: + if not self.isnullable(rhs[pos]): + break + pos = pos + 1 + return pos + + def makeState(self, state, sym): + assert sym is not None + # + # Compute \epsilon-kernel state's core and see if + # it exists already. + # + kitems = [] + for rule, pos in self.states[state].items: + lhs, rhs = rule + if rhs[pos:pos+1] == (sym,): + kitems.append((rule, self.skip(rule, pos+1))) + core = kitems + + core.sort() + tcore = tuple(core) + if self.cores.has_key(tcore): + return self.cores[tcore] + # + # Nope, doesn't exist. Compute it and the associated + # \epsilon-nonkernel state together; we'll need it right away. + # + k = self.cores[tcore] = len(self.states) + K, NK = _State(k, kitems), _State(k+1, []) + self.states[k] = K + predicted = {} + + edges = self.edges + rules = self.newrules + for X in K, NK: + worklist = X.items + for item in worklist: + rule, pos = item + lhs, rhs = rule + if pos == len(rhs): + X.complete.append(rule) + continue + + nextSym = rhs[pos] + key = (X.stateno, nextSym) + if not rules.has_key(nextSym): + if not edges.has_key(key): + edges[key] = None + X.T.append(nextSym) + else: + edges[key] = None + if not predicted.has_key(nextSym): + predicted[nextSym] = 1 + for prule in rules[nextSym]: + ppos = self.skip(prule) + new = (prule, ppos) + NK.items.append(new) + # + # Problem: we know K needs generating, but we + # don't yet know about NK. Can't commit anything + # regarding NK to self.edges until we're sure. Should + # we delay committing on both K and NK to avoid this + # hacky code? This creates other problems.. + # + if X is K: + edges = {} + + if NK.items == []: + return k + + # + # Check for \epsilon-nonkernel's core. Unfortunately we + # need to know the entire set of predicted nonterminals + # to do this without accidentally duplicating states. + # + core = predicted.keys() + core.sort() + tcore = tuple(core) + if self.cores.has_key(tcore): + self.edges[(k, None)] = self.cores[tcore] + return k + + nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno + self.edges.update(edges) + self.states[nk] = NK + return k + + def goto(self, state, sym): + key = (state, sym) + if not self.edges.has_key(key): + # + # No transitions from state on sym. + # + return None + + rv = self.edges[key] + if rv is None: + # + # Target state isn't generated yet. Remedy this. + # + rv = self.makeState(state, sym) + self.edges[key] = rv + return rv + + def gotoT(self, state, t): + return [self.goto(state, t)] + + def gotoST(self, state, st): + rv = [] + for t in self.states[state].T: + if st == t: + rv.append(self.goto(state, t)) + return rv + + def add(self, set, item, i=None, predecessor=None, causal=None): + if predecessor is None: + if item not in set: + set.append(item) + else: + key = (item, i) + if item not in set: + self.links[key] = [] + set.append(item) + self.links[key].append((predecessor, causal)) + + def makeSet(self, token, sets, i): + cur, next = sets[i], sets[i+1] + + ttype = token is not None and self.typestring(token) or None + if ttype is not None: + fn, arg = self.gotoT, ttype + else: + fn, arg = self.gotoST, token + + for item in cur: + ptr = (item, i) + state, parent = item + add = fn(state, arg) + for k in add: + if k is not None: + self.add(next, (k, parent), i+1, ptr) + nk = self.goto(k, None) + if nk is not None: + self.add(next, (nk, i+1)) + + if parent == i: + continue + + for rule in self.states[state].complete: + lhs, rhs = rule + for pitem in sets[parent]: + pstate, pparent = pitem + k = self.goto(pstate, lhs) + if k is not None: + why = (item, i, rule) + pptr = (pitem, parent) + self.add(cur, (k, pparent), + i, pptr, why) + nk = self.goto(k, None) + if nk is not None: + self.add(cur, (nk, i)) + + def makeSet_fast(self, token, sets, i): + # + # Call *only* when the entire state machine has been built! + # It relies on self.edges being filled in completely, and + # then duplicates and inlines code to boost speed at the + # cost of extreme ugliness. + # + cur, next = sets[i], sets[i+1] + ttype = token is not None and self.typestring(token) or None + + for item in cur: + ptr = (item, i) + state, parent = item + if ttype is not None: + k = self.edges.get((state, ttype), None) + if k is not None: + #self.add(next, (k, parent), i+1, ptr) + #INLINED --v + new = (k, parent) + key = (new, i+1) + if new not in next: + self.links[key] = [] + next.append(new) + self.links[key].append((ptr, None)) + #INLINED --^ + #nk = self.goto(k, None) + nk = self.edges.get((k, None), None) + if nk is not None: + #self.add(next, (nk, i+1)) + #INLINED --v + new = (nk, i+1) + if new not in next: + next.append(new) + #INLINED --^ + else: + add = self.gotoST(state, token) + for k in add: + if k is not None: + self.add(next, (k, parent), i+1, ptr) + #nk = self.goto(k, None) + nk = self.edges.get((k, None), None) + if nk is not None: + self.add(next, (nk, i+1)) + + if parent == i: + continue + + for rule in self.states[state].complete: + lhs, rhs = rule + for pitem in sets[parent]: + pstate, pparent = pitem + #k = self.goto(pstate, lhs) + k = self.edges.get((pstate, lhs), None) + if k is not None: + why = (item, i, rule) + pptr = (pitem, parent) + #self.add(cur, (k, pparent), + # i, pptr, why) + #INLINED --v + new = (k, pparent) + key = (new, i) + if new not in cur: + self.links[key] = [] + cur.append(new) + self.links[key].append((pptr, why)) + #INLINED --^ + #nk = self.goto(k, None) + nk = self.edges.get((k, None), None) + if nk is not None: + #self.add(cur, (nk, i)) + #INLINED --v + new = (nk, i) + if new not in cur: + cur.append(new) + #INLINED --^ + + def predecessor(self, key, causal): + for p, c in self.links[key]: + if c == causal: + return p + assert 0 + + def causal(self, key): + links = self.links[key] + if len(links) == 1: + return links[0][1] + choices = [] + rule2cause = {} + for p, c in links: + rule = c[2] + choices.append(rule) + rule2cause[rule] = c + return rule2cause[self.ambiguity(choices)] + + def deriveEpsilon(self, nt): + if len(self.newrules[nt]) > 1: + rule = self.ambiguity(self.newrules[nt]) + else: + rule = self.newrules[nt][0] + #print rule + + rhs = rule[1] + attr = [None] * len(rhs) + + for i in range(len(rhs)-1, -1, -1): + attr[i] = self.deriveEpsilon(rhs[i]) + return self.rule2func[self.new2old[rule]](attr) + + def buildTree(self, nt, item, tokens, k): + state, parent = item + + choices = [] + for rule in self.states[state].complete: + if rule[0] == nt: + choices.append(rule) + rule = choices[0] + if len(choices) > 1: + rule = self.ambiguity(choices) + #print rule + + rhs = rule[1] + attr = [None] * len(rhs) + + for i in range(len(rhs)-1, -1, -1): + sym = rhs[i] + if not self.newrules.has_key(sym): + if sym != self._BOF: + attr[i] = tokens[k-1] + key = (item, k) + item, k = self.predecessor(key, None) + #elif self.isnullable(sym): + elif self._NULLABLE == sym[0:len(self._NULLABLE)]: + attr[i] = self.deriveEpsilon(sym) + else: + key = (item, k) + why = self.causal(key) + attr[i] = self.buildTree(sym, why[0], + tokens, why[1]) + item, k = self.predecessor(key, why) + return self.rule2func[self.new2old[rule]](attr) + + def ambiguity(self, rules): + # + # XXX - problem here and in collectRules() if the same rule + # appears in >1 method. Also undefined results if rules + # causing the ambiguity appear in the same method. + # + sortlist = [] + name2index = {} + for i in range(len(rules)): + lhs, rhs = rule = rules[i] + name = self.rule2name[self.new2old[rule]] + sortlist.append((len(rhs), name)) + name2index[name] = i + sortlist.sort() + list = map(lambda (a,b): b, sortlist) + return rules[name2index[self.resolve(list)]] + + def resolve(self, list): + # + # Resolve ambiguity in favor of the shortest RHS. + # Since we walk the tree from the top down, this + # should effectively resolve in favor of a "shift". + # + return list[0] # # GenericASTBuilder automagically constructs a concrete/abstract syntax tree @@ -681,32 +681,32 @@ # class GenericASTBuilder(GenericParser): - def __init__(self, AST, start): - GenericParser.__init__(self, start) - self.AST = AST - - def preprocess(self, rule, func): - rebind = lambda lhs, self=self: \ - lambda args, lhs=lhs, self=self: \ - self.buildASTNode(args, lhs) - lhs, rhs = rule - return rule, rebind(lhs) - - def buildASTNode(self, args, lhs): - children = [] - for arg in args: - if isinstance(arg, self.AST): - children.append(arg) - else: - children.append(self.terminal(arg)) - return self.nonterminal(lhs, children) - - def terminal(self, token): return token - - def nonterminal(self, type, args): - rv = self.AST(type) - rv[:len(args)] = args - return rv + def __init__(self, AST, start): + GenericParser.__init__(self, start) + self.AST = AST + + def preprocess(self, rule, func): + rebind = lambda lhs, self=self: \ + lambda args, lhs=lhs, self=self: \ + self.buildASTNode(args, lhs) + lhs, rhs = rule + return rule, rebind(lhs) + + def buildASTNode(self, args, lhs): + children = [] + for arg in args: + if isinstance(arg, self.AST): + children.append(arg) + else: + children.append(self.terminal(arg)) + return self.nonterminal(lhs, children) + + def terminal(self, token): return token + + def nonterminal(self, type, args): + rv = self.AST(type) + rv[:len(args)] = args + return rv # # GenericASTTraversal is a Visitor pattern according to Design Patterns. For @@ -719,57 +719,57 @@ # class GenericASTTraversalPruningException: - pass + pass class GenericASTTraversal: - def __init__(self, ast): - self.ast = ast + def __init__(self, ast): + self.ast = ast - def typestring(self, node): - return node.type + def typestring(self, node): + return node.type - def prune(self): - raise GenericASTTraversalPruningException + def prune(self): + raise GenericASTTraversalPruningException - def preorder(self, node=None): - if node is None: - node = self.ast - - try: - name = 'n_' + self.typestring(node) - if hasattr(self, name): - func = getattr(self, name) - func(node) - else: - self.default(node) - except GenericASTTraversalPruningException: - return - - for kid in node: - self.preorder(kid) - - name = name + '_exit' - if hasattr(self, name): - func = getattr(self, name) - func(node) - - def postorder(self, node=None): - if node is None: - node = self.ast - - for kid in node: - self.postorder(kid) - - name = 'n_' + self.typestring(node) - if hasattr(self, name): - func = getattr(self, name) - func(node) - else: - self.default(node) + def preorder(self, node=None): + if node is None: + node = self.ast + + try: + name = 'n_' + self.typestring(node) + if hasattr(self, name): + func = getattr(self, name) + func(node) + else: + self.default(node) + except GenericASTTraversalPruningException: + return + + for kid in node: + self.preorder(kid) + + name = name + '_exit' + if hasattr(self, name): + func = getattr(self, name) + func(node) + + def postorder(self, node=None): + if node is None: + node = self.ast + + for kid in node: + self.postorder(kid) + + name = 'n_' + self.typestring(node) + if hasattr(self, name): + func = getattr(self, name) + func(node) + else: + self.default(node) - def default(self, node): - pass + def default(self, node): + pass # # GenericASTMatcher. AST nodes must have "__getitem__" and "__cmp__" @@ -779,62 +779,62 @@ # class GenericASTMatcher(GenericParser): - def __init__(self, start, ast): - GenericParser.__init__(self, start) - self.ast = ast - - def preprocess(self, rule, func): - rebind = lambda func, self=self: \ - lambda args, func=func, self=self: \ - self.foundMatch(args, func) - lhs, rhs = rule - rhslist = list(rhs) - rhslist.reverse() - - return (lhs, tuple(rhslist)), rebind(func) - - def foundMatch(self, args, func): - func(args[-1]) - return args[-1] - - def match_r(self, node): - self.input.insert(0, node) - children = 0 - - for child in node: - if children == 0: - self.input.insert(0, '(') - children = children + 1 - self.match_r(child) - - if children > 0: - self.input.insert(0, ')') - - def match(self, ast=None): - if ast is None: - ast = self.ast - self.input = [] - - self.match_r(ast) - self.parse(self.input) - - def resolve(self, list): - # - # Resolve ambiguity in favor of the longest RHS. - # - return list[-1] + def __init__(self, start, ast): + GenericParser.__init__(self, start) + self.ast = ast + + def preprocess(self, rule, func): + rebind = lambda func, self=self: \ + lambda args, func=func, self=self: \ + self.foundMatch(args, func) + lhs, rhs = rule + rhslist = list(rhs) + rhslist.reverse() + + return (lhs, tuple(rhslist)), rebind(func) + + def foundMatch(self, args, func): + func(args[-1]) + return args[-1] + + def match_r(self, node): + self.input.insert(0, node) + children = 0 + + for child in node: + if children == 0: + self.input.insert(0, '(') + children = children + 1 + self.match_r(child) + + if children > 0: + self.input.insert(0, ')') + + def match(self, ast=None): + if ast is None: + ast = self.ast + self.input = [] + + self.match_r(ast) + self.parse(self.input) + + def resolve(self, list): + # + # Resolve ambiguity in favor of the longest RHS. + # + return list[-1] def _dump(tokens, sets, states): - for i in range(len(sets)): - print 'set', i - for item in sets[i]: - print '\t', item - for (lhs, rhs), pos in states[item[0]].items: - print '\t\t', lhs, '::=', - print string.join(rhs[:pos]), - print '.', - print string.join(rhs[pos:]) - if i < len(tokens): - print - print 'token', str(tokens[i]) - print + for i in range(len(sets)): + print 'set', i + for item in sets[i]: + print '\t', item + for (lhs, rhs), pos in states[item[0]].items: + print '\t\t', lhs, '::=', + print string.join(rhs[:pos]), + print '.', + print string.join(rhs[pos:]) + if i < len(tokens): + print + print 'token', str(tokens[i]) + print Modified: python/branches/ssize_t/Parser/tokenizer.c ============================================================================== --- python/branches/ssize_t/Parser/tokenizer.c (original) +++ python/branches/ssize_t/Parser/tokenizer.c Mon Jan 2 16:17:17 2006 @@ -229,7 +229,7 @@ } while (t[0] == '\x20' || t[0] == '\t'); begin = t; - while (isalnum((int)t[0]) || + while (isalnum(Py_CHARMASK(t[0])) || t[0] == '-' || t[0] == '_' || t[0] == '.') t++; @@ -292,6 +292,12 @@ PyMem_DEL(cs); } } + if (!r) { + cs = tok->encoding; + if (!cs) + cs = "with BOM"; + PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs); + } return r; } Modified: python/branches/ssize_t/Python/Python-ast.c ============================================================================== --- python/branches/ssize_t/Python/Python-ast.c (original) +++ python/branches/ssize_t/Python/Python-ast.c Mon Jan 2 16:17:17 2006 @@ -19,10 +19,10 @@ static int marshal_write_alias(PyObject **, int *, alias_ty); mod_ty -Module(asdl_seq * body) +Module(asdl_seq * body, PyArena *arena) { mod_ty p; - p = (mod_ty)malloc(sizeof(*p)); + p = (mod_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -33,10 +33,10 @@ } mod_ty -Interactive(asdl_seq * body) +Interactive(asdl_seq * body, PyArena *arena) { mod_ty p; - p = (mod_ty)malloc(sizeof(*p)); + p = (mod_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -47,7 +47,7 @@ } mod_ty -Expression(expr_ty body) +Expression(expr_ty body, PyArena *arena) { mod_ty p; if (!body) { @@ -55,7 +55,7 @@ "field body is required for Expression"); return NULL; } - p = (mod_ty)malloc(sizeof(*p)); + p = (mod_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -66,10 +66,10 @@ } mod_ty -Suite(asdl_seq * body) +Suite(asdl_seq * body, PyArena *arena) { mod_ty p; - p = (mod_ty)malloc(sizeof(*p)); + p = (mod_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -81,7 +81,7 @@ stmt_ty FunctionDef(identifier name, arguments_ty args, asdl_seq * body, asdl_seq * - decorators, int lineno) + decorators, int lineno, PyArena *arena) { stmt_ty p; if (!name) { @@ -94,7 +94,7 @@ "field args is required for FunctionDef"); return NULL; } - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -109,7 +109,8 @@ } stmt_ty -ClassDef(identifier name, asdl_seq * bases, asdl_seq * body, int lineno) +ClassDef(identifier name, asdl_seq * bases, asdl_seq * body, int lineno, + PyArena *arena) { stmt_ty p; if (!name) { @@ -117,7 +118,7 @@ "field name is required for ClassDef"); return NULL; } - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -131,10 +132,10 @@ } stmt_ty -Return(expr_ty value, int lineno) +Return(expr_ty value, int lineno, PyArena *arena) { stmt_ty p; - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -146,10 +147,10 @@ } stmt_ty -Delete(asdl_seq * targets, int lineno) +Delete(asdl_seq * targets, int lineno, PyArena *arena) { stmt_ty p; - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -161,7 +162,7 @@ } stmt_ty -Assign(asdl_seq * targets, expr_ty value, int lineno) +Assign(asdl_seq * targets, expr_ty value, int lineno, PyArena *arena) { stmt_ty p; if (!value) { @@ -169,7 +170,7 @@ "field value is required for Assign"); return NULL; } - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -182,7 +183,8 @@ } stmt_ty -AugAssign(expr_ty target, operator_ty op, expr_ty value, int lineno) +AugAssign(expr_ty target, operator_ty op, expr_ty value, int lineno, PyArena + *arena) { stmt_ty p; if (!target) { @@ -200,7 +202,7 @@ "field value is required for AugAssign"); return NULL; } - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -214,10 +216,10 @@ } stmt_ty -Print(expr_ty dest, asdl_seq * values, bool nl, int lineno) +Print(expr_ty dest, asdl_seq * values, bool nl, int lineno, PyArena *arena) { stmt_ty p; - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -232,7 +234,7 @@ stmt_ty For(expr_ty target, expr_ty iter, asdl_seq * body, asdl_seq * orelse, int - lineno) + lineno, PyArena *arena) { stmt_ty p; if (!target) { @@ -245,7 +247,7 @@ "field iter is required for For"); return NULL; } - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -260,7 +262,8 @@ } stmt_ty -While(expr_ty test, asdl_seq * body, asdl_seq * orelse, int lineno) +While(expr_ty test, asdl_seq * body, asdl_seq * orelse, int lineno, PyArena + *arena) { stmt_ty p; if (!test) { @@ -268,7 +271,7 @@ "field test is required for While"); return NULL; } - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -282,7 +285,7 @@ } stmt_ty -If(expr_ty test, asdl_seq * body, asdl_seq * orelse, int lineno) +If(expr_ty test, asdl_seq * body, asdl_seq * orelse, int lineno, PyArena *arena) { stmt_ty p; if (!test) { @@ -290,7 +293,7 @@ "field test is required for If"); return NULL; } - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -304,10 +307,10 @@ } stmt_ty -Raise(expr_ty type, expr_ty inst, expr_ty tback, int lineno) +Raise(expr_ty type, expr_ty inst, expr_ty tback, int lineno, PyArena *arena) { stmt_ty p; - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -321,10 +324,11 @@ } stmt_ty -TryExcept(asdl_seq * body, asdl_seq * handlers, asdl_seq * orelse, int lineno) +TryExcept(asdl_seq * body, asdl_seq * handlers, asdl_seq * orelse, int lineno, + PyArena *arena) { stmt_ty p; - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -338,10 +342,10 @@ } stmt_ty -TryFinally(asdl_seq * body, asdl_seq * finalbody, int lineno) +TryFinally(asdl_seq * body, asdl_seq * finalbody, int lineno, PyArena *arena) { stmt_ty p; - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -354,7 +358,7 @@ } stmt_ty -Assert(expr_ty test, expr_ty msg, int lineno) +Assert(expr_ty test, expr_ty msg, int lineno, PyArena *arena) { stmt_ty p; if (!test) { @@ -362,7 +366,7 @@ "field test is required for Assert"); return NULL; } - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -375,10 +379,10 @@ } stmt_ty -Import(asdl_seq * names, int lineno) +Import(asdl_seq * names, int lineno, PyArena *arena) { stmt_ty p; - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -390,7 +394,7 @@ } stmt_ty -ImportFrom(identifier module, asdl_seq * names, int lineno) +ImportFrom(identifier module, asdl_seq * names, int lineno, PyArena *arena) { stmt_ty p; if (!module) { @@ -398,7 +402,7 @@ "field module is required for ImportFrom"); return NULL; } - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -411,7 +415,7 @@ } stmt_ty -Exec(expr_ty body, expr_ty globals, expr_ty locals, int lineno) +Exec(expr_ty body, expr_ty globals, expr_ty locals, int lineno, PyArena *arena) { stmt_ty p; if (!body) { @@ -419,7 +423,7 @@ "field body is required for Exec"); return NULL; } - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -433,10 +437,10 @@ } stmt_ty -Global(asdl_seq * names, int lineno) +Global(asdl_seq * names, int lineno, PyArena *arena) { stmt_ty p; - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -448,7 +452,7 @@ } stmt_ty -Expr(expr_ty value, int lineno) +Expr(expr_ty value, int lineno, PyArena *arena) { stmt_ty p; if (!value) { @@ -456,7 +460,7 @@ "field value is required for Expr"); return NULL; } - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -468,10 +472,10 @@ } stmt_ty -Pass(int lineno) +Pass(int lineno, PyArena *arena) { stmt_ty p; - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -482,10 +486,10 @@ } stmt_ty -Break(int lineno) +Break(int lineno, PyArena *arena) { stmt_ty p; - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -496,10 +500,10 @@ } stmt_ty -Continue(int lineno) +Continue(int lineno, PyArena *arena) { stmt_ty p; - p = (stmt_ty)malloc(sizeof(*p)); + p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -510,7 +514,7 @@ } expr_ty -BoolOp(boolop_ty op, asdl_seq * values, int lineno) +BoolOp(boolop_ty op, asdl_seq * values, int lineno, PyArena *arena) { expr_ty p; if (!op) { @@ -518,7 +522,7 @@ "field op is required for BoolOp"); return NULL; } - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -531,7 +535,7 @@ } expr_ty -BinOp(expr_ty left, operator_ty op, expr_ty right, int lineno) +BinOp(expr_ty left, operator_ty op, expr_ty right, int lineno, PyArena *arena) { expr_ty p; if (!left) { @@ -549,7 +553,7 @@ "field right is required for BinOp"); return NULL; } - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -563,7 +567,7 @@ } expr_ty -UnaryOp(unaryop_ty op, expr_ty operand, int lineno) +UnaryOp(unaryop_ty op, expr_ty operand, int lineno, PyArena *arena) { expr_ty p; if (!op) { @@ -576,7 +580,7 @@ "field operand is required for UnaryOp"); return NULL; } - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -589,7 +593,7 @@ } expr_ty -Lambda(arguments_ty args, expr_ty body, int lineno) +Lambda(arguments_ty args, expr_ty body, int lineno, PyArena *arena) { expr_ty p; if (!args) { @@ -602,7 +606,7 @@ "field body is required for Lambda"); return NULL; } - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -615,10 +619,10 @@ } expr_ty -Dict(asdl_seq * keys, asdl_seq * values, int lineno) +Dict(asdl_seq * keys, asdl_seq * values, int lineno, PyArena *arena) { expr_ty p; - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -631,7 +635,7 @@ } expr_ty -ListComp(expr_ty elt, asdl_seq * generators, int lineno) +ListComp(expr_ty elt, asdl_seq * generators, int lineno, PyArena *arena) { expr_ty p; if (!elt) { @@ -639,7 +643,7 @@ "field elt is required for ListComp"); return NULL; } - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -652,7 +656,7 @@ } expr_ty -GeneratorExp(expr_ty elt, asdl_seq * generators, int lineno) +GeneratorExp(expr_ty elt, asdl_seq * generators, int lineno, PyArena *arena) { expr_ty p; if (!elt) { @@ -660,7 +664,7 @@ "field elt is required for GeneratorExp"); return NULL; } - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -673,10 +677,10 @@ } expr_ty -Yield(expr_ty value, int lineno) +Yield(expr_ty value, int lineno, PyArena *arena) { expr_ty p; - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -688,7 +692,8 @@ } expr_ty -Compare(expr_ty left, asdl_seq * ops, asdl_seq * comparators, int lineno) +Compare(expr_ty left, asdl_seq * ops, asdl_seq * comparators, int lineno, + PyArena *arena) { expr_ty p; if (!left) { @@ -696,7 +701,7 @@ "field left is required for Compare"); return NULL; } - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -711,7 +716,7 @@ expr_ty Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, expr_ty starargs, - expr_ty kwargs, int lineno) + expr_ty kwargs, int lineno, PyArena *arena) { expr_ty p; if (!func) { @@ -719,7 +724,7 @@ "field func is required for Call"); return NULL; } - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -735,7 +740,7 @@ } expr_ty -Repr(expr_ty value, int lineno) +Repr(expr_ty value, int lineno, PyArena *arena) { expr_ty p; if (!value) { @@ -743,7 +748,7 @@ "field value is required for Repr"); return NULL; } - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -755,7 +760,7 @@ } expr_ty -Num(object n, int lineno) +Num(object n, int lineno, PyArena *arena) { expr_ty p; if (!n) { @@ -763,7 +768,7 @@ "field n is required for Num"); return NULL; } - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -775,7 +780,7 @@ } expr_ty -Str(string s, int lineno) +Str(string s, int lineno, PyArena *arena) { expr_ty p; if (!s) { @@ -783,7 +788,7 @@ "field s is required for Str"); return NULL; } - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -795,7 +800,8 @@ } expr_ty -Attribute(expr_ty value, identifier attr, expr_context_ty ctx, int lineno) +Attribute(expr_ty value, identifier attr, expr_context_ty ctx, int lineno, + PyArena *arena) { expr_ty p; if (!value) { @@ -813,7 +819,7 @@ "field ctx is required for Attribute"); return NULL; } - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -827,7 +833,8 @@ } expr_ty -Subscript(expr_ty value, slice_ty slice, expr_context_ty ctx, int lineno) +Subscript(expr_ty value, slice_ty slice, expr_context_ty ctx, int lineno, + PyArena *arena) { expr_ty p; if (!value) { @@ -845,7 +852,7 @@ "field ctx is required for Subscript"); return NULL; } - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -859,7 +866,7 @@ } expr_ty -Name(identifier id, expr_context_ty ctx, int lineno) +Name(identifier id, expr_context_ty ctx, int lineno, PyArena *arena) { expr_ty p; if (!id) { @@ -872,7 +879,7 @@ "field ctx is required for Name"); return NULL; } - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -885,7 +892,7 @@ } expr_ty -List(asdl_seq * elts, expr_context_ty ctx, int lineno) +List(asdl_seq * elts, expr_context_ty ctx, int lineno, PyArena *arena) { expr_ty p; if (!ctx) { @@ -893,7 +900,7 @@ "field ctx is required for List"); return NULL; } - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -906,7 +913,7 @@ } expr_ty -Tuple(asdl_seq * elts, expr_context_ty ctx, int lineno) +Tuple(asdl_seq * elts, expr_context_ty ctx, int lineno, PyArena *arena) { expr_ty p; if (!ctx) { @@ -914,7 +921,7 @@ "field ctx is required for Tuple"); return NULL; } - p = (expr_ty)malloc(sizeof(*p)); + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -927,10 +934,10 @@ } slice_ty -Ellipsis() +Ellipsis(PyArena *arena) { slice_ty p; - p = (slice_ty)malloc(sizeof(*p)); + p = (slice_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -940,10 +947,10 @@ } slice_ty -Slice(expr_ty lower, expr_ty upper, expr_ty step) +Slice(expr_ty lower, expr_ty upper, expr_ty step, PyArena *arena) { slice_ty p; - p = (slice_ty)malloc(sizeof(*p)); + p = (slice_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -956,10 +963,10 @@ } slice_ty -ExtSlice(asdl_seq * dims) +ExtSlice(asdl_seq * dims, PyArena *arena) { slice_ty p; - p = (slice_ty)malloc(sizeof(*p)); + p = (slice_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -970,7 +977,7 @@ } slice_ty -Index(expr_ty value) +Index(expr_ty value, PyArena *arena) { slice_ty p; if (!value) { @@ -978,7 +985,7 @@ "field value is required for Index"); return NULL; } - p = (slice_ty)malloc(sizeof(*p)); + p = (slice_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -989,7 +996,7 @@ } comprehension_ty -comprehension(expr_ty target, expr_ty iter, asdl_seq * ifs) +comprehension(expr_ty target, expr_ty iter, asdl_seq * ifs, PyArena *arena) { comprehension_ty p; if (!target) { @@ -1002,7 +1009,7 @@ "field iter is required for comprehension"); return NULL; } - p = (comprehension_ty)malloc(sizeof(*p)); + p = (comprehension_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -1014,10 +1021,10 @@ } excepthandler_ty -excepthandler(expr_ty type, expr_ty name, asdl_seq * body) +excepthandler(expr_ty type, expr_ty name, asdl_seq * body, PyArena *arena) { excepthandler_ty p; - p = (excepthandler_ty)malloc(sizeof(*p)); + p = (excepthandler_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -1030,10 +1037,10 @@ arguments_ty arguments(asdl_seq * args, identifier vararg, identifier kwarg, asdl_seq * - defaults) + defaults, PyArena *arena) { arguments_ty p; - p = (arguments_ty)malloc(sizeof(*p)); + p = (arguments_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -1046,7 +1053,7 @@ } keyword_ty -keyword(identifier arg, expr_ty value) +keyword(identifier arg, expr_ty value, PyArena *arena) { keyword_ty p; if (!arg) { @@ -1059,7 +1066,7 @@ "field value is required for keyword"); return NULL; } - p = (keyword_ty)malloc(sizeof(*p)); + p = (keyword_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -1070,7 +1077,7 @@ } alias_ty -alias(identifier name, identifier asname) +alias(identifier name, identifier asname, PyArena *arena) { alias_ty p; if (!name) { @@ -1078,7 +1085,7 @@ "field name is required for alias"); return NULL; } - p = (alias_ty)malloc(sizeof(*p)); + p = (alias_ty)PyArena_Malloc(arena, sizeof(*p)); if (!p) { PyErr_NoMemory(); return NULL; @@ -1089,454 +1096,6 @@ } -static void -free_seq_exprs(asdl_seq *seq) -{ - int i, n; - n = asdl_seq_LEN(seq); - for (i = 0; i < n; i++) - free_expr((expr_ty)asdl_seq_GET(seq, i)); - asdl_seq_free(seq); -} - -static void -free_seq_stmts(asdl_seq *seq) -{ - int i, n; - n = asdl_seq_LEN(seq); - for (i = 0; i < n; i++) - free_stmt((stmt_ty)asdl_seq_GET(seq, i)); - asdl_seq_free(seq); -} - - -void -free_mod(mod_ty o) -{ - if (!o) - return; - - switch (o->kind) { - case Module_kind: - free_seq_stmts(o->v.Module.body); - break; - case Interactive_kind: - free_seq_stmts(o->v.Interactive.body); - break; - case Expression_kind: - free_expr((expr_ty)o->v.Expression.body); - break; - case Suite_kind: - free_seq_stmts(o->v.Suite.body); - break; - } - - free(o); -} - -void -free_stmt(stmt_ty o) -{ - int i, n; - asdl_seq *seq; - - if (!o) - return; - - switch (o->kind) { - case FunctionDef_kind: - Py_DECREF((identifier)o->v.FunctionDef.name); - free_arguments((arguments_ty)o->v.FunctionDef.args); - free_seq_stmts(o->v.FunctionDef.body); - free_seq_exprs(o->v.FunctionDef.decorators); - break; - case ClassDef_kind: - Py_DECREF((identifier)o->v.ClassDef.name); - free_seq_exprs(o->v.ClassDef.bases); - free_seq_stmts(o->v.ClassDef.body); - break; - case Return_kind: - if (o->v.Return.value) { - free_expr((expr_ty)o->v.Return.value); - } - break; - case Delete_kind: - free_seq_exprs(o->v.Delete.targets); - break; - case Assign_kind: - free_seq_exprs(o->v.Assign.targets); - free_expr((expr_ty)o->v.Assign.value); - break; - case AugAssign_kind: - free_expr((expr_ty)o->v.AugAssign.target); - free_operator((operator_ty)o->v.AugAssign.op); - free_expr((expr_ty)o->v.AugAssign.value); - break; - case Print_kind: - if (o->v.Print.dest) { - free_expr((expr_ty)o->v.Print.dest); - } - free_seq_exprs(o->v.Print.values); - break; - case For_kind: - free_expr((expr_ty)o->v.For.target); - free_expr((expr_ty)o->v.For.iter); - free_seq_stmts(o->v.For.body); - free_seq_stmts(o->v.For.orelse); - break; - case While_kind: - free_expr((expr_ty)o->v.While.test); - free_seq_stmts(o->v.While.body); - free_seq_stmts(o->v.While.orelse); - break; - case If_kind: - free_expr((expr_ty)o->v.If.test); - free_seq_stmts(o->v.If.body); - free_seq_stmts(o->v.If.orelse); - break; - case Raise_kind: - if (o->v.Raise.type) { - free_expr((expr_ty)o->v.Raise.type); - } - if (o->v.Raise.inst) { - free_expr((expr_ty)o->v.Raise.inst); - } - if (o->v.Raise.tback) { - free_expr((expr_ty)o->v.Raise.tback); - } - break; - case TryExcept_kind: - free_seq_stmts(o->v.TryExcept.body); - seq = o->v.TryExcept.handlers; - n = asdl_seq_LEN(seq); - for (i = 0; i < n; i++) - free_excepthandler((excepthandler_ty)asdl_seq_GET(seq, - i)); - asdl_seq_free(seq); - free_seq_stmts(o->v.TryExcept.orelse); - break; - case TryFinally_kind: - free_seq_stmts(o->v.TryFinally.body); - free_seq_stmts(o->v.TryFinally.finalbody); - break; - case Assert_kind: - free_expr((expr_ty)o->v.Assert.test); - if (o->v.Assert.msg) { - free_expr((expr_ty)o->v.Assert.msg); - } - break; - case Import_kind: - seq = o->v.Import.names; - n = asdl_seq_LEN(seq); - for (i = 0; i < n; i++) - free_alias((alias_ty)asdl_seq_GET(seq, i)); - asdl_seq_free(seq); - break; - case ImportFrom_kind: - Py_DECREF((identifier)o->v.ImportFrom.module); - seq = o->v.ImportFrom.names; - n = asdl_seq_LEN(seq); - for (i = 0; i < n; i++) - free_alias((alias_ty)asdl_seq_GET(seq, i)); - asdl_seq_free(seq); - break; - case Exec_kind: - free_expr((expr_ty)o->v.Exec.body); - if (o->v.Exec.globals) { - free_expr((expr_ty)o->v.Exec.globals); - } - if (o->v.Exec.locals) { - free_expr((expr_ty)o->v.Exec.locals); - } - break; - case Global_kind: - seq = o->v.Global.names; - n = asdl_seq_LEN(seq); - for (i = 0; i < n; i++) - Py_DECREF((identifier)asdl_seq_GET(seq, i)); - asdl_seq_free(seq); - break; - case Expr_kind: - free_expr((expr_ty)o->v.Expr.value); - break; - case Pass_kind: - break; - case Break_kind: - break; - case Continue_kind: - break; - } - - free(o); -} - -void -free_expr(expr_ty o) -{ - int i, n; - asdl_seq *seq; - - if (!o) - return; - - switch (o->kind) { - case BoolOp_kind: - free_boolop((boolop_ty)o->v.BoolOp.op); - free_seq_exprs(o->v.BoolOp.values); - break; - case BinOp_kind: - free_expr((expr_ty)o->v.BinOp.left); - free_operator((operator_ty)o->v.BinOp.op); - free_expr((expr_ty)o->v.BinOp.right); - break; - case UnaryOp_kind: - free_unaryop((unaryop_ty)o->v.UnaryOp.op); - free_expr((expr_ty)o->v.UnaryOp.operand); - break; - case Lambda_kind: - free_arguments((arguments_ty)o->v.Lambda.args); - free_expr((expr_ty)o->v.Lambda.body); - break; - case Dict_kind: - free_seq_exprs(o->v.Dict.keys); - free_seq_exprs(o->v.Dict.values); - break; - case ListComp_kind: - free_expr((expr_ty)o->v.ListComp.elt); - seq = o->v.ListComp.generators; - n = asdl_seq_LEN(seq); - for (i = 0; i < n; i++) - free_comprehension((comprehension_ty)asdl_seq_GET(seq, - i)); - asdl_seq_free(seq); - break; - case GeneratorExp_kind: - free_expr((expr_ty)o->v.GeneratorExp.elt); - seq = o->v.GeneratorExp.generators; - n = asdl_seq_LEN(seq); - for (i = 0; i < n; i++) - free_comprehension((comprehension_ty)asdl_seq_GET(seq, - i)); - asdl_seq_free(seq); - break; - case Yield_kind: - if (o->v.Yield.value) { - free_expr((expr_ty)o->v.Yield.value); - } - break; - case Compare_kind: - free_expr((expr_ty)o->v.Compare.left); - seq = o->v.Compare.ops; - n = asdl_seq_LEN(seq); - for (i = 0; i < n; i++) - free_cmpop((cmpop_ty)asdl_seq_GET(seq, i)); - asdl_seq_free(seq); - free_seq_exprs(o->v.Compare.comparators); - break; - case Call_kind: - free_expr((expr_ty)o->v.Call.func); - free_seq_exprs(o->v.Call.args); - seq = o->v.Call.keywords; - n = asdl_seq_LEN(seq); - for (i = 0; i < n; i++) - free_keyword((keyword_ty)asdl_seq_GET(seq, i)); - asdl_seq_free(seq); - if (o->v.Call.starargs) { - free_expr((expr_ty)o->v.Call.starargs); - } - if (o->v.Call.kwargs) { - free_expr((expr_ty)o->v.Call.kwargs); - } - break; - case Repr_kind: - free_expr((expr_ty)o->v.Repr.value); - break; - case Num_kind: - Py_DECREF((object)o->v.Num.n); - break; - case Str_kind: - Py_DECREF((string)o->v.Str.s); - break; - case Attribute_kind: - free_expr((expr_ty)o->v.Attribute.value); - Py_DECREF((identifier)o->v.Attribute.attr); - free_expr_context((expr_context_ty)o->v.Attribute.ctx); - break; - case Subscript_kind: - free_expr((expr_ty)o->v.Subscript.value); - free_slice((slice_ty)o->v.Subscript.slice); - free_expr_context((expr_context_ty)o->v.Subscript.ctx); - break; - case Name_kind: - Py_DECREF((identifier)o->v.Name.id); - free_expr_context((expr_context_ty)o->v.Name.ctx); - break; - case List_kind: - free_seq_exprs(o->v.List.elts); - free_expr_context((expr_context_ty)o->v.List.ctx); - break; - case Tuple_kind: - free_seq_exprs(o->v.Tuple.elts); - free_expr_context((expr_context_ty)o->v.Tuple.ctx); - break; - } - - free(o); -} - -void -free_expr_context(expr_context_ty o) -{ - if (!o) - return; - -} - -void -free_slice(slice_ty o) -{ - int i, n; - asdl_seq *seq; - - if (!o) - return; - - switch (o->kind) { - case Ellipsis_kind: - break; - case Slice_kind: - if (o->v.Slice.lower) { - free_expr((expr_ty)o->v.Slice.lower); - } - if (o->v.Slice.upper) { - free_expr((expr_ty)o->v.Slice.upper); - } - if (o->v.Slice.step) { - free_expr((expr_ty)o->v.Slice.step); - } - break; - case ExtSlice_kind: - seq = o->v.ExtSlice.dims; - n = asdl_seq_LEN(seq); - for (i = 0; i < n; i++) - free_slice((slice_ty)asdl_seq_GET(seq, i)); - asdl_seq_free(seq); - break; - case Index_kind: - free_expr((expr_ty)o->v.Index.value); - break; - } - - free(o); -} - -void -free_boolop(boolop_ty o) -{ - if (!o) - return; - -} - -void -free_operator(operator_ty o) -{ - if (!o) - return; - -} - -void -free_unaryop(unaryop_ty o) -{ - if (!o) - return; - -} - -void -free_cmpop(cmpop_ty o) -{ - if (!o) - return; - -} - -void -free_comprehension(comprehension_ty o) -{ - if (!o) - return; - - free_expr((expr_ty)o->target); - free_expr((expr_ty)o->iter); - free_seq_exprs(o->ifs); - - free(o); -} - -void -free_excepthandler(excepthandler_ty o) -{ - if (!o) - return; - - if (o->type) { - free_expr((expr_ty)o->type); - } - if (o->name) { - free_expr((expr_ty)o->name); - } - free_seq_stmts(o->body); - - free(o); -} - -void -free_arguments(arguments_ty o) -{ - if (!o) - return; - - free_seq_exprs(o->args); - if (o->vararg) { - Py_DECREF((identifier)o->vararg); - } - if (o->kwarg) { - Py_DECREF((identifier)o->kwarg); - } - free_seq_exprs(o->defaults); - - free(o); -} - -void -free_keyword(keyword_ty o) -{ - if (!o) - return; - - Py_DECREF((identifier)o->arg); - free_expr((expr_ty)o->value); - - free(o); -} - -void -free_alias(alias_ty o) -{ - if (!o) - return; - - Py_DECREF((identifier)o->name); - if (o->asname) { - Py_DECREF((identifier)o->asname); - } - - free(o); -} - - #define CHECKSIZE(BUF, OFF, MIN) { \ int need = *(OFF) + MIN; \ Modified: python/branches/ssize_t/Python/asdl.c ============================================================================== --- python/branches/ssize_t/Python/asdl.c (original) +++ python/branches/ssize_t/Python/asdl.c Mon Jan 2 16:17:17 2006 @@ -2,17 +2,18 @@ #include "asdl.h" asdl_seq * -asdl_seq_new(int size) +asdl_seq_new(int size, PyArena *arena) { asdl_seq *seq = NULL; size_t n = sizeof(asdl_seq) + (size ? (sizeof(void *) * (size - 1)) : 0); - seq = (asdl_seq *)PyObject_Malloc(n); + seq = (asdl_seq *)malloc(n); if (!seq) { PyErr_NoMemory(); return NULL; } + PyArena_AddMallocPointer(arena, (void *)seq); memset(seq, 0, n); seq->size = size; return seq; @@ -21,6 +22,4 @@ void asdl_seq_free(asdl_seq *seq) { - PyObject_Free(seq); } - Modified: python/branches/ssize_t/Python/ast.c ============================================================================== --- python/branches/ssize_t/Python/ast.c (original) +++ python/branches/ssize_t/Python/ast.c Mon Jan 2 16:17:17 2006 @@ -7,6 +7,7 @@ #include "Python-ast.h" #include "grammar.h" #include "node.h" +#include "pyarena.h" #include "ast.h" #include "token.h" #include "parsetok.h" @@ -20,51 +21,10 @@ - syntax errors */ -/* - Note: - - You should rarely need to use the asdl_seq_free() in this file. - If you use asdl_seq_free(), you will leak any objects held in the seq. - If there is an appropriate asdl_*_seq_free() function, use it. - If there isn't an asdl_*_seq_free() function for you, you will - need to loop over the data in the sequence and free it. - - asdl_seq* seq; - int i; - - for (i = 0; i < asdl_seq_LEN(seq); i++) - free_***(asdl_seq_GET(seq, i)); - asdl_seq_free(seq); / * ok * / - - Almost all of the ast functions return a seq of expr, so you should - use asdl_expr_seq_free(). The exception is ast_for_suite() which - returns a seq of stmt's, so use asdl_stmt_seq_free() to free it. - - If asdl_seq_free is appropriate, you should mark it with an ok comment. - - There are still many memory problems in this file even though - it runs clean in valgrind, save one problem that may have existed - before the AST. - - Any code which does something like this: - - return ASTconstruct(local, LINENO(n)); - - will leak memory. The problem is if ASTconstruct (e.g., TryFinally) - cannot allocate memory, local will be leaked. - - There was discussion on python-dev to replace the entire allocation - scheme in this file with arenas. Basically rather than allocate - memory in little blocks with malloc(), we allocate one big honking - hunk and deref everything into this block. We would still need - another block or technique to handle the PyObject*s. - - http://mail.python.org/pipermail/python-dev/2005-November/058138.html -*/ - /* Data structure used internally */ struct compiling { - char *c_encoding; /* source encoding */ + char *c_encoding; /* source encoding */ + PyArena *c_arena; /* arena for allocating memeory */ }; static asdl_seq *seq_for_testlist(struct compiling *, const node *); @@ -86,63 +46,14 @@ #define LINENO(n) ((n)->n_lineno) #endif -#define NEW_IDENTIFIER(n) PyString_InternFromString(STR(n)) - -static void -asdl_stmt_seq_free(asdl_seq* seq) -{ - int n, i; - - if (!seq) - return; - - n = asdl_seq_LEN(seq); - for (i = 0; i < n; i++) - free_stmt(asdl_seq_GET(seq, i)); - asdl_seq_free(seq); /* ok */ -} - -static void -asdl_expr_seq_free(asdl_seq* seq) -{ - int n, i; - - if (!seq) - return; - - n = asdl_seq_LEN(seq); - for (i = 0; i < n; i++) - free_expr(asdl_seq_GET(seq, i)); - asdl_seq_free(seq); /* ok */ +static identifier +new_identifier(const char* n, PyArena *arena) { + PyObject* id = PyString_InternFromString(n); + PyArena_AddPyObject(arena, id); + return id; } -static void -asdl_alias_seq_free(asdl_seq* seq) -{ - int n, i; - - if (!seq) - return; - - n = asdl_seq_LEN(seq); - for (i = 0; i < n; i++) - free_alias(asdl_seq_GET(seq, i)); - asdl_seq_free(seq); /* ok */ -} - -static void -asdl_comprehension_seq_free(asdl_seq* seq) -{ - int n, i; - - if (!seq) - return; - - n = asdl_seq_LEN(seq); - for (i = 0; i < n; i++) - free_comprehension(asdl_seq_GET(seq, i)); - asdl_seq_free(seq); /* ok */ -} +#define NEW_IDENTIFIER(n) new_identifier(STR(n), c->c_arena) /* This routine provides an invalid object for the syntax error. The outermost routine must unpack this error and create the @@ -269,7 +180,8 @@ */ mod_ty -PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename) +PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename, + PyArena *arena) { int i, j, num; asdl_seq *stmts = NULL; @@ -278,17 +190,18 @@ struct compiling c; if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) { - c.c_encoding = "utf-8"; + c.c_encoding = "utf-8"; } else if (TYPE(n) == encoding_decl) { c.c_encoding = STR(n); n = CHILD(n, 0); } else { c.c_encoding = NULL; } + c.c_arena = arena; switch (TYPE(n)) { case file_input: - stmts = asdl_seq_new(num_stmts(n)); + stmts = asdl_seq_new(num_stmts(n), arena); if (!stmts) return NULL; for (i = 0; i < NCH(n) - 1; i++) { @@ -314,7 +227,7 @@ } } } - return Module(stmts); + return Module(stmts, arena); case eval_input: { expr_ty testlist_ast; @@ -322,20 +235,20 @@ testlist_ast = ast_for_testlist(&c, CHILD(n, 0)); if (!testlist_ast) goto error; - return Expression(testlist_ast); + return Expression(testlist_ast, arena); } case single_input: if (TYPE(CHILD(n, 0)) == NEWLINE) { - stmts = asdl_seq_new(1); + stmts = asdl_seq_new(1, arena); if (!stmts) goto error; - asdl_seq_SET(stmts, 0, Pass(n->n_lineno)); - return Interactive(stmts); + asdl_seq_SET(stmts, 0, Pass(n->n_lineno, arena)); + return Interactive(stmts, arena); } else { n = CHILD(n, 0); num = num_stmts(n); - stmts = asdl_seq_new(num); + stmts = asdl_seq_new(num, arena); if (!stmts) goto error; if (num == 1) { @@ -358,14 +271,12 @@ } } - return Interactive(stmts); + return Interactive(stmts, arena); } default: goto error; } error: - if (stmts) - asdl_stmt_seq_free(stmts); ast_error_finish(filename); return NULL; } @@ -589,7 +500,7 @@ || TYPE(n) == testlist_safe ); - seq = asdl_seq_new((NCH(n) + 1) / 2); + seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); if (!seq) return NULL; @@ -597,10 +508,8 @@ REQ(CHILD(n, i), test); expression = ast_for_expr(c, CHILD(n, i)); - if (!expression) { - asdl_expr_seq_free(seq); + if (!expression) return NULL; - } assert(i / 2 < seq->size); asdl_seq_SET(seq, i / 2, expression); @@ -609,11 +518,11 @@ } static expr_ty -compiler_complex_args(const node *n) +compiler_complex_args(struct compiling *c, const node *n) { int i, len = (NCH(n) + 1) / 2; expr_ty result; - asdl_seq *args = asdl_seq_new(len); + asdl_seq *args = asdl_seq_new(len, c->c_arena); if (!args) return NULL; @@ -627,15 +536,16 @@ ast_error(child, "assignment to None"); return NULL; } - arg = Name(NEW_IDENTIFIER(child), Store, LINENO(child)); + arg = Name(NEW_IDENTIFIER(child), Store, LINENO(child), + c->c_arena); } else - arg = compiler_complex_args(CHILD(CHILD(n, 2*i), 1)); + arg = compiler_complex_args(c, CHILD(CHILD(n, 2*i), 1)); set_context(arg, Store, n); asdl_seq_SET(args, i, arg); } - result = Tuple(args, Store, LINENO(n)); + result = Tuple(args, Store, LINENO(n), c->c_arena); set_context(result, Store, n); return result; } @@ -660,7 +570,7 @@ if (TYPE(n) == parameters) { if (NCH(n) == 2) /* () as argument list */ - return arguments(NULL, NULL, NULL, NULL); + return arguments(NULL, NULL, NULL, NULL, c->c_arena); n = CHILD(n, 1); } REQ(n, varargslist); @@ -668,16 +578,15 @@ /* first count the number of normal args & defaults */ for (i = 0; i < NCH(n); i++) { ch = CHILD(n, i); - if (TYPE(ch) == fpdef) { + if (TYPE(ch) == fpdef) n_args++; - } if (TYPE(ch) == EQUAL) n_defaults++; } - args = (n_args ? asdl_seq_new(n_args) : NULL); + args = (n_args ? asdl_seq_new(n_args, c->c_arena) : NULL); if (!args && n_args) return NULL; /* Don't need to go to NULL; nothing allocated */ - defaults = (n_defaults ? asdl_seq_new(n_defaults) : NULL); + defaults = (n_defaults ? asdl_seq_new(n_defaults, c->c_arena) : NULL); if (!defaults && n_defaults) goto error; @@ -706,7 +615,7 @@ if (NCH(ch) == 3) { asdl_seq_APPEND(args, - compiler_complex_args(CHILD(ch, 1))); + compiler_complex_args(c, CHILD(ch, 1))); } else if (TYPE(CHILD(ch, 0)) == NAME) { expr_ty name; @@ -715,7 +624,7 @@ goto error; } name = Name(NEW_IDENTIFIER(CHILD(ch, 0)), - Param, LINENO(ch)); + Param, LINENO(ch), c->c_arena); if (!name) goto error; asdl_seq_APPEND(args, name); @@ -747,53 +656,40 @@ } } - return arguments(args, vararg, kwarg, defaults); + return arguments(args, vararg, kwarg, defaults, c->c_arena); error: Py_XDECREF(vararg); Py_XDECREF(kwarg); - if (args) - asdl_expr_seq_free(args); - if (defaults) - asdl_expr_seq_free(defaults); return NULL; } static expr_ty ast_for_dotted_name(struct compiling *c, const node *n) { - expr_ty e = NULL; - expr_ty attrib = NULL; - identifier id = NULL; + expr_ty e; + identifier id; int i; REQ(n, dotted_name); id = NEW_IDENTIFIER(CHILD(n, 0)); if (!id) - goto error; - e = Name(id, Load, LINENO(n)); + return NULL; + e = Name(id, Load, LINENO(n), c->c_arena); if (!e) - goto error; - id = NULL; + return NULL; for (i = 2; i < NCH(n); i+=2) { id = NEW_IDENTIFIER(CHILD(n, i)); if (!id) - goto error; - attrib = Attribute(e, id, Load, LINENO(CHILD(n, i))); - if (!attrib) - goto error; - e = attrib; - attrib = NULL; + return NULL; + e = Attribute(e, id, Load, LINENO(CHILD(n, i)), c->c_arena); + if (!e) + return NULL; } return e; - - error: - Py_XDECREF(id); - free_expr(e); - return NULL; } static expr_ty @@ -801,77 +697,69 @@ { /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */ expr_ty d = NULL; - expr_ty name_expr = NULL; + expr_ty name_expr; REQ(n, decorator); if ((NCH(n) < 3 && NCH(n) != 5 && NCH(n) != 6) || TYPE(CHILD(n, 0)) != AT || TYPE(RCHILD(n, -1)) != NEWLINE) { ast_error(n, "Invalid decorator node"); - goto error; + return NULL; } name_expr = ast_for_dotted_name(c, CHILD(n, 1)); if (!name_expr) - goto error; + return NULL; if (NCH(n) == 3) { /* No arguments */ d = name_expr; name_expr = NULL; } else if (NCH(n) == 5) { /* Call with no arguments */ - d = Call(name_expr, NULL, NULL, NULL, NULL, LINENO(n)); + d = Call(name_expr, NULL, NULL, NULL, NULL, LINENO(n), c->c_arena); if (!d) - goto error; + return NULL; name_expr = NULL; } else { d = ast_for_call(c, CHILD(n, 3), name_expr); if (!d) - goto error; + return NULL; name_expr = NULL; } return d; - - error: - free_expr(name_expr); - free_expr(d); - return NULL; } static asdl_seq* ast_for_decorators(struct compiling *c, const node *n) { - asdl_seq* decorator_seq = NULL; + asdl_seq* decorator_seq; expr_ty d; int i; REQ(n, decorators); - decorator_seq = asdl_seq_new(NCH(n)); + decorator_seq = asdl_seq_new(NCH(n), c->c_arena); if (!decorator_seq) return NULL; for (i = 0; i < NCH(n); i++) { d = ast_for_decorator(c, CHILD(n, i)); if (!d) - goto error; + return NULL; asdl_seq_APPEND(decorator_seq, d); } return decorator_seq; - error: - asdl_expr_seq_free(decorator_seq); - return NULL; } static stmt_ty ast_for_funcdef(struct compiling *c, const node *n) { /* funcdef: 'def' [decorators] NAME parameters ':' suite */ - identifier name = NULL; - arguments_ty args = NULL; - asdl_seq *body = NULL; + identifier name; + arguments_ty args; + asdl_seq *body; asdl_seq *decorator_seq = NULL; int name_i; @@ -880,7 +768,7 @@ if (NCH(n) == 6) { /* decorators are present */ decorator_seq = ast_for_decorators(c, CHILD(n, 0)); if (!decorator_seq) - goto error; + return NULL; name_i = 2; } else { @@ -889,26 +777,19 @@ name = NEW_IDENTIFIER(CHILD(n, name_i)); if (!name) - goto error; + return NULL; else if (!strcmp(STR(CHILD(n, name_i)), "None")) { ast_error(CHILD(n, name_i), "assignment to None"); - goto error; + return NULL; } args = ast_for_arguments(c, CHILD(n, name_i + 1)); if (!args) - goto error; + return NULL; body = ast_for_suite(c, CHILD(n, name_i + 3)); if (!body) - goto error; - - return FunctionDef(name, args, body, decorator_seq, LINENO(n)); + return NULL; -error: - asdl_stmt_seq_free(body); - asdl_expr_seq_free(decorator_seq); - free_arguments(args); - Py_XDECREF(name); - return NULL; + return FunctionDef(name, args, body, decorator_seq, LINENO(n), c->c_arena); } static expr_ty @@ -919,27 +800,23 @@ expr_ty expression; if (NCH(n) == 3) { - args = arguments(NULL, NULL, NULL, NULL); + args = arguments(NULL, NULL, NULL, NULL, c->c_arena); if (!args) return NULL; expression = ast_for_expr(c, CHILD(n, 2)); - if (!expression) { - free_arguments(args); + if (!expression) return NULL; - } } else { args = ast_for_arguments(c, CHILD(n, 1)); if (!args) return NULL; expression = ast_for_expr(c, CHILD(n, 3)); - if (!expression) { - free_arguments(args); + if (!expression) return NULL; - } } - return Lambda(args, expression, LINENO(n)); + return Lambda(args, expression, LINENO(n), c->c_arena); } /* Count the number of 'for' loop in a list comprehension. @@ -973,11 +850,10 @@ else return n_fors; } - else { - /* Should never be reached */ - PyErr_SetString(PyExc_SystemError, "logic error in count_list_fors"); - return -1; - } + + /* Should never be reached */ + PyErr_SetString(PyExc_SystemError, "logic error in count_list_fors"); + return -1; } /* Count the number of 'if' statements in a list comprehension. @@ -1028,12 +904,10 @@ if (n_fors == -1) return NULL; - listcomps = asdl_seq_new(n_fors); - if (!listcomps) { - free_expr(elt); + listcomps = asdl_seq_new(n_fors, c->c_arena); + if (!listcomps) return NULL; - } - + ch = CHILD(n, 1); for (i = 0; i < n_fors; i++) { comprehension_ty lc; @@ -1043,35 +917,20 @@ REQ(ch, list_for); t = ast_for_exprlist(c, CHILD(ch, 1), Store); - if (!t) { - asdl_comprehension_seq_free(listcomps); - free_expr(elt); + if (!t) return NULL; - } expression = ast_for_testlist(c, CHILD(ch, 3)); - if (!expression) { - asdl_expr_seq_free(t); - asdl_comprehension_seq_free(listcomps); - free_expr(elt); + if (!expression) return NULL; - } - if (asdl_seq_LEN(t) == 1) { - lc = comprehension(asdl_seq_GET(t, 0), expression, NULL); - /* only free the sequence since we grabbed element 0 above */ - if (lc) - asdl_seq_free(t); /* ok */ - } + if (asdl_seq_LEN(t) == 1) + lc = comprehension(asdl_seq_GET(t, 0), expression, NULL, + c->c_arena); else - lc = comprehension(Tuple(t, Store, LINENO(ch)), expression, NULL); - - if (!lc) { - asdl_expr_seq_free(t); - asdl_comprehension_seq_free(listcomps); - free_expr(expression); - free_expr(elt); + lc = comprehension(Tuple(t, Store, LINENO(ch), c->c_arena), + expression, NULL, c->c_arena); + if (!lc) return NULL; - } if (NCH(ch) == 5) { int j, n_ifs; @@ -1079,20 +938,12 @@ ch = CHILD(ch, 4); n_ifs = count_list_ifs(ch); - if (n_ifs == -1) { - free_comprehension(lc); - asdl_comprehension_seq_free(listcomps); - free_expr(elt); + if (n_ifs == -1) return NULL; - } - ifs = asdl_seq_new(n_ifs); - if (!ifs) { - free_comprehension(lc); - asdl_comprehension_seq_free(listcomps); - free_expr(elt); + ifs = asdl_seq_new(n_ifs, c->c_arena); + if (!ifs) return NULL; - } for (j = 0; j < n_ifs; j++) { REQ(ch, list_iter); @@ -1112,7 +963,7 @@ asdl_seq_APPEND(listcomps, lc); } - return ListComp(elt, listcomps, LINENO(n)); + return ListComp(elt, listcomps, LINENO(n), c->c_arena); } /* @@ -1147,12 +998,11 @@ else return n_fors; } - else { - /* Should never be reached */ - PyErr_SetString(PyExc_SystemError, - "logic error in count_gen_fors"); - return -1; - } + + /* Should never be reached */ + PyErr_SetString(PyExc_SystemError, + "logic error in count_gen_fors"); + return -1; } /* Count the number of 'if' statements in a generator expression. @@ -1198,13 +1048,11 @@ n_fors = count_gen_fors(n); if (n_fors == -1) return NULL; - - genexps = asdl_seq_new(n_fors); - if (!genexps) { - free_expr(elt); + + genexps = asdl_seq_new(n_fors, c->c_arena); + if (!genexps) return NULL; - } - + ch = CHILD(n, 1); for (i = 0; i < n_fors; i++) { comprehension_ty ge; @@ -1214,59 +1062,35 @@ REQ(ch, gen_for); t = ast_for_exprlist(c, CHILD(ch, 1), Store); - if (!t) { - asdl_comprehension_seq_free(genexps); - asdl_expr_seq_free(t); - free_expr(elt); + if (!t) return NULL; - } expression = ast_for_expr(c, CHILD(ch, 3)); - if (!expression) { - asdl_comprehension_seq_free(genexps); - asdl_expr_seq_free(t); - free_expr(elt); + if (!expression) return NULL; - } - - if (asdl_seq_LEN(t) == 1) { + + if (asdl_seq_LEN(t) == 1) ge = comprehension(asdl_seq_GET(t, 0), expression, - NULL); - /* only free the sequence since we grabbed element 0 above */ - if (ge) - asdl_seq_free(t); /* ok */ - } + NULL, c->c_arena); else - ge = comprehension(Tuple(t, Store, LINENO(ch)), - expression, NULL); - - if (!ge) { - asdl_comprehension_seq_free(genexps); - asdl_expr_seq_free(t); - free_expr(elt); + ge = comprehension(Tuple(t, Store, LINENO(ch), c->c_arena), + expression, NULL, c->c_arena); + + if (!ge) return NULL; - } - + if (NCH(ch) == 5) { int j, n_ifs; asdl_seq *ifs; ch = CHILD(ch, 4); n_ifs = count_gen_ifs(ch); - if (n_ifs == -1) { - asdl_comprehension_seq_free(genexps); - free_comprehension(ge); - free_expr(elt); + if (n_ifs == -1) return NULL; - } - - ifs = asdl_seq_new(n_ifs); - if (!ifs) { - asdl_comprehension_seq_free(genexps); - free_comprehension(ge); - free_expr(elt); + + ifs = asdl_seq_new(n_ifs, c->c_arena); + if (!ifs) return NULL; - } - + for (j = 0; j < n_ifs; j++) { expr_ty expression; REQ(ch, gen_iter); @@ -1274,13 +1098,8 @@ REQ(ch, gen_if); expression = ast_for_expr(c, CHILD(ch, 1)); - if (!expression) { - asdl_expr_seq_free(ifs); - asdl_comprehension_seq_free(genexps); - free_comprehension(ge); - free_expr(elt); + if (!expression) return NULL; - } asdl_seq_APPEND(ifs, expression); if (NCH(ch) == 3) ch = CHILD(ch, 2); @@ -1293,7 +1112,7 @@ asdl_seq_APPEND(genexps, ge); } - return GeneratorExp(elt, genexps, LINENO(n)); + return GeneratorExp(elt, genexps, LINENO(n), c->c_arena); } static expr_ty @@ -1308,28 +1127,28 @@ case NAME: /* All names start in Load context, but may later be changed. */ - return Name(NEW_IDENTIFIER(ch), Load, LINENO(n)); + return Name(NEW_IDENTIFIER(ch), Load, LINENO(n), c->c_arena); case STRING: { PyObject *str = parsestrplus(c, n); - if (!str) return NULL; - - return Str(str, LINENO(n)); + + PyArena_AddPyObject(c->c_arena, str); + return Str(str, LINENO(n), c->c_arena); } case NUMBER: { PyObject *pynum = parsenumber(STR(ch)); - if (!pynum) return NULL; - - return Num(pynum, LINENO(n)); + + PyArena_AddPyObject(c->c_arena, pynum); + return Num(pynum, LINENO(n), c->c_arena); } case LPAR: /* some parenthesized expressions */ ch = CHILD(n, 1); if (TYPE(ch) == RPAR) - return Tuple(NULL, Load, LINENO(n)); + return Tuple(NULL, Load, LINENO(n), c->c_arena); if (TYPE(ch) == yield_expr) return ast_for_expr(c, ch); @@ -1342,16 +1161,15 @@ ch = CHILD(n, 1); if (TYPE(ch) == RSQB) - return List(NULL, Load, LINENO(n)); + return List(NULL, Load, LINENO(n), c->c_arena); REQ(ch, listmaker); if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) { asdl_seq *elts = seq_for_testlist(c, ch); - if (!elts) return NULL; - - return List(elts, Load, LINENO(n)); + + return List(elts, Load, LINENO(n), c->c_arena); } else return ast_for_listcomp(c, ch); @@ -1362,46 +1180,37 @@ ch = CHILD(n, 1); size = (NCH(ch) + 1) / 4; /* +1 in case no trailing comma */ - keys = asdl_seq_new(size); + keys = asdl_seq_new(size, c->c_arena); if (!keys) return NULL; - values = asdl_seq_new(size); - if (!values) { - asdl_seq_free(keys); /* ok */ + values = asdl_seq_new(size, c->c_arena); + if (!values) return NULL; - } for (i = 0; i < NCH(ch); i += 4) { expr_ty expression; expression = ast_for_expr(c, CHILD(ch, i)); - if (!expression) { - asdl_expr_seq_free(keys); - asdl_expr_seq_free(values); + if (!expression) return NULL; - } - + asdl_seq_SET(keys, i / 4, expression); - + expression = ast_for_expr(c, CHILD(ch, i + 2)); - if (!expression) { - asdl_expr_seq_free(keys); - asdl_expr_seq_free(values); + if (!expression) return NULL; - } asdl_seq_SET(values, i / 4, expression); } - return Dict(keys, values, LINENO(n)); + return Dict(keys, values, LINENO(n), c->c_arena); } case BACKQUOTE: { /* repr */ expr_ty expression = ast_for_testlist(c, CHILD(n, 1)); - if (!expression) return NULL; - - return Repr(expression, LINENO(n)); + + return Repr(expression, LINENO(n), c->c_arena); } default: PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch)); @@ -1423,7 +1232,7 @@ */ ch = CHILD(n, 0); if (TYPE(ch) == DOT) - return Ellipsis(); + return Ellipsis(c->c_arena); if (NCH(n) == 1 && TYPE(ch) == test) { /* 'step' variable hold no significance in terms of being used over @@ -1432,7 +1241,7 @@ if (!step) return NULL; - return Index(step); + return Index(step, c->c_arena); } if (TYPE(ch) == test) { @@ -1477,7 +1286,7 @@ } } - return Slice(lower, upper, step); + return Slice(lower, upper, step, c->c_arena); } static expr_ty @@ -1504,7 +1313,7 @@ if (!operator) return NULL; - result = BinOp(expr1, operator, expr2, LINENO(n)); + result = BinOp(expr1, operator, expr2, LINENO(n), c->c_arena); if (!result) return NULL; @@ -1522,7 +1331,7 @@ return NULL; tmp_result = BinOp(result, operator, tmp, - LINENO(next_oper)); + LINENO(next_oper), c->c_arena); if (!tmp) return NULL; result = tmp_result; @@ -1538,7 +1347,7 @@ REQ(n, trailer); if (TYPE(CHILD(n, 0)) == LPAR) { if (NCH(n) == 2) - e = Call(left_expr, NULL, NULL, NULL, NULL, LINENO(n)); + e = Call(left_expr, NULL, NULL, NULL, NULL, LINENO(n), c->c_arena); else e = ast_for_call(c, CHILD(n, 1), left_expr); } @@ -1549,40 +1358,32 @@ slice_ty slc = ast_for_slice(c, CHILD(n, 0)); if (!slc) return NULL; - e = Subscript(left_expr, slc, Load, LINENO(n)); - if (!e) { - free_slice(slc); + e = Subscript(left_expr, slc, Load, LINENO(n), c->c_arena); + if (!e) return NULL; - } } else { int j; slice_ty slc; - asdl_seq *slices = asdl_seq_new((NCH(n) + 1) / 2); + asdl_seq *slices = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); if (!slices) return NULL; for (j = 0; j < NCH(n); j += 2) { slc = ast_for_slice(c, CHILD(n, j)); - if (!slc) { - for (j = j / 2; j >= 0; j--) - free_slice(asdl_seq_GET(slices, j)); - asdl_seq_free(slices); /* ok */ + if (!slc) return NULL; - } asdl_seq_SET(slices, j / 2, slc); } - e = Subscript(left_expr, ExtSlice(slices), Load, LINENO(n)); - if (!e) { - for (j = 0; j < asdl_seq_LEN(slices); j++) - free_slice(asdl_seq_GET(slices, j)); - asdl_seq_free(slices); /* ok */ + e = Subscript(left_expr, ExtSlice(slices, c->c_arena), + Load, LINENO(n), c->c_arena); + if (!e) return NULL; - } } } else { assert(TYPE(CHILD(n, 0)) == DOT); - e = Attribute(left_expr, NEW_IDENTIFIER(CHILD(n, 1)), Load, LINENO(n)); + e = Attribute(left_expr, NEW_IDENTIFIER(CHILD(n, 1)), Load, LINENO(n), + c->c_arena); } return e; } @@ -1605,24 +1406,17 @@ if (TYPE(ch) != trailer) break; tmp = ast_for_trailer(c, ch, e); - if (!tmp) { - free_expr(e); + if (!tmp) return NULL; - } e = tmp; } if (TYPE(CHILD(n, NCH(n) - 1)) == factor) { expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1)); - if (!f) { - free_expr(e); + if (!f) return NULL; - } - tmp = BinOp(e, Pow, f, LINENO(n)); - if (!tmp) { - free_expr(f); - free_expr(e); + tmp = BinOp(e, Pow, f, LINENO(n), c->c_arena); + if (!tmp) return NULL; - } e = tmp; } return e; @@ -1663,7 +1457,7 @@ n = CHILD(n, 0); goto loop; } - seq = asdl_seq_new((NCH(n) + 1) / 2); + seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); if (!seq) return NULL; for (i = 0; i < NCH(n); i += 2) { @@ -1673,12 +1467,9 @@ asdl_seq_SET(seq, i / 2, e); } if (!strcmp(STR(CHILD(n, 1)), "and")) - return BoolOp(And, seq, LINENO(n)); - else { - assert(!strcmp(STR(CHILD(n, 1)), "or")); - return BoolOp(Or, seq, LINENO(n)); - } - break; + return BoolOp(And, seq, LINENO(n), c->c_arena); + assert(!strcmp(STR(CHILD(n, 1)), "or")); + return BoolOp(Or, seq, LINENO(n), c->c_arena); case not_test: if (NCH(n) == 1) { n = CHILD(n, 0); @@ -1689,7 +1480,7 @@ if (!expression) return NULL; - return UnaryOp(Not, expression, LINENO(n)); + return UnaryOp(Not, expression, LINENO(n), c->c_arena); } case comparison: if (NCH(n) == 1) { @@ -1699,12 +1490,11 @@ else { expr_ty expression; asdl_seq *ops, *cmps; - ops = asdl_seq_new(NCH(n) / 2); + ops = asdl_seq_new(NCH(n) / 2, c->c_arena); if (!ops) return NULL; - cmps = asdl_seq_new(NCH(n) / 2); + cmps = asdl_seq_new(NCH(n) / 2, c->c_arena); if (!cmps) { - asdl_seq_free(ops); /* ok */ return NULL; } for (i = 1; i < NCH(n); i += 2) { @@ -1713,15 +1503,11 @@ operator = ast_for_comp_op(CHILD(n, i)); if (!operator) { - asdl_expr_seq_free(ops); - asdl_expr_seq_free(cmps); return NULL; } expression = ast_for_expr(c, CHILD(n, i + 1)); if (!expression) { - asdl_expr_seq_free(ops); - asdl_expr_seq_free(cmps); return NULL; } @@ -1730,12 +1516,10 @@ } expression = ast_for_expr(c, CHILD(n, 0)); if (!expression) { - asdl_expr_seq_free(ops); - asdl_expr_seq_free(cmps); return NULL; } - return Compare(expression, ops, cmps, LINENO(n)); + return Compare(expression, ops, cmps, LINENO(n), c->c_arena); } break; @@ -1761,7 +1545,7 @@ if (!exp) return NULL; } - return Yield(exp, LINENO(n)); + return Yield(exp, LINENO(n), c->c_arena); } case factor: { expr_ty expression; @@ -1777,11 +1561,11 @@ switch (TYPE(CHILD(n, 0))) { case PLUS: - return UnaryOp(UAdd, expression, LINENO(n)); + return UnaryOp(UAdd, expression, LINENO(n), c->c_arena); case MINUS: - return UnaryOp(USub, expression, LINENO(n)); + return UnaryOp(USub, expression, LINENO(n), c->c_arena); case TILDE: - return UnaryOp(Invert, expression, LINENO(n)); + return UnaryOp(Invert, expression, LINENO(n), c->c_arena); } PyErr_Format(PyExc_SystemError, "unhandled factor: %d", TYPE(CHILD(n, 0))); @@ -1793,7 +1577,7 @@ PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n)); return NULL; } - /* should never get here */ + /* should never get here unless if error is set */ return NULL; } @@ -1807,8 +1591,8 @@ */ int i, nargs, nkeywords, ngens; - asdl_seq *args = NULL; - asdl_seq *keywords = NULL; + asdl_seq *args; + asdl_seq *keywords; expr_ty vararg = NULL, kwarg = NULL; REQ(n, arglist); @@ -1838,12 +1622,12 @@ return NULL; } - args = asdl_seq_new(nargs + ngens); + args = asdl_seq_new(nargs + ngens, c->c_arena); if (!args) - goto error; - keywords = asdl_seq_new(nkeywords); + return NULL; + keywords = asdl_seq_new(nkeywords, c->c_arena); if (!keywords) - goto error; + return NULL; nargs = 0; nkeywords = 0; for (i = 0; i < NCH(n); i++) { @@ -1853,13 +1637,13 @@ if (NCH(ch) == 1) { e = ast_for_expr(c, CHILD(ch, 0)); if (!e) - goto error; + return NULL; asdl_seq_SET(args, nargs++, e); } else if (TYPE(CHILD(ch, 1)) == gen_for) { e = ast_for_genexp(c, ch); if (!e) - goto error; + return NULL; asdl_seq_SET(args, nargs++, e); } else { @@ -1869,7 +1653,7 @@ /* CHILD(ch, 0) is test, but must be an identifier? */ e = ast_for_expr(c, CHILD(ch, 0)); if (!e) - goto error; + return NULL; /* f(lambda x: x[0] = 3) ends up getting parsed with * LHS test = lambda x: x[0], and RHS test = 3. * SF bug 132313 points out that complaining about a keyword @@ -1877,19 +1661,18 @@ */ if (e->kind == Lambda_kind) { ast_error(CHILD(ch, 0), "lambda cannot contain assignment"); - goto error; + return NULL; } else if (e->kind != Name_kind) { ast_error(CHILD(ch, 0), "keyword can't be an expression"); - goto error; + return NULL; } key = e->v.Name.id; - free(e); /* XXX: is free correct here? */ e = ast_for_expr(c, CHILD(ch, 2)); if (!e) - goto error; - kw = keyword(key, e); + return NULL; + kw = keyword(key, e, c->c_arena); if (!kw) - goto error; + return NULL; asdl_seq_SET(keywords, nkeywords++, kw); } } @@ -1903,19 +1686,7 @@ } } - return Call(func, args, keywords, vararg, kwarg, LINENO(n)); - - error: - free_expr(vararg); - free_expr(kwarg); - if (args) - asdl_expr_seq_free(args); - if (keywords) { - for (i = 0; i < asdl_seq_LEN(keywords); i++) - free_keyword(asdl_seq_GET(keywords, i)); - asdl_seq_free(keywords); /* ok */ - } - return NULL; + return Call(func, args, keywords, vararg, kwarg, LINENO(n), c->c_arena); } static expr_ty @@ -1941,7 +1712,7 @@ asdl_seq *tmp = seq_for_testlist(c, n); if (!tmp) return NULL; - return Tuple(tmp, Load, LINENO(n)); + return Tuple(tmp, Load, LINENO(n), c->c_arena); } } @@ -1951,11 +1722,9 @@ /* testlist_gexp: test ( gen_for | (',' test)* [','] ) */ /* argument: test [ gen_for ] */ assert(TYPE(n) == testlist_gexp || TYPE(n) == argument); - if (NCH(n) > 1 && TYPE(CHILD(n, 1)) == gen_for) { + if (NCH(n) > 1 && TYPE(CHILD(n, 1)) == gen_for) return ast_for_genexp(c, n); - } - else - return ast_for_testlist(c, n); + return ast_for_testlist(c, n); } /* like ast_for_testlist() but returns a sequence */ @@ -1967,20 +1736,17 @@ REQ(n, testlist); if (NCH(n) == 1) { expr_ty base; - asdl_seq *bases = asdl_seq_new(1); + asdl_seq *bases = asdl_seq_new(1, c->c_arena); if (!bases) return NULL; base = ast_for_expr(c, CHILD(n, 0)); - if (!base) { - asdl_seq_free(bases); /* ok */ + if (!base) return NULL; - } asdl_seq_SET(bases, 0, base); return bases; } - else { - return seq_for_testlist(c, n); - } + + return seq_for_testlist(c, n); } static stmt_ty @@ -2000,7 +1766,7 @@ if (!e) return NULL; - return Expr(e, LINENO(n)); + return Expr(e, LINENO(n), c->c_arena); } else if (TYPE(CHILD(n, 1)) == augassign) { expr_ty expr1, expr2; @@ -2010,12 +1776,12 @@ if (TYPE(ch) == testlist) expr1 = ast_for_testlist(c, ch); else - expr1 = Yield(ast_for_expr(c, CHILD(ch, 0)), LINENO(ch)); + expr1 = Yield(ast_for_expr(c, CHILD(ch, 0)), LINENO(ch), + c->c_arena); if (!expr1) return NULL; if (expr1->kind == GeneratorExp_kind) { - free_expr(expr1); ast_error(ch, "augmented assignment to generator " "expression not possible"); return NULL; @@ -2023,7 +1789,6 @@ if (expr1->kind == Name_kind) { char *var_name = PyString_AS_STRING(expr1->v.Name.id); if (var_name[0] == 'N' && !strcmp(var_name, "None")) { - free_expr(expr1); ast_error(ch, "assignment to None"); return NULL; } @@ -2033,20 +1798,15 @@ if (TYPE(ch) == testlist) expr2 = ast_for_testlist(c, ch); else - expr2 = Yield(ast_for_expr(c, ch), LINENO(ch)); - if (!expr2) { - free_expr(expr1); + expr2 = Yield(ast_for_expr(c, ch), LINENO(ch), c->c_arena); + if (!expr2) return NULL; - } operator = ast_for_augassign(CHILD(n, 1)); - if (!operator) { - free_expr(expr1); - free_expr(expr2); + if (!operator) return NULL; - } - return AugAssign(expr1, operator, expr2, LINENO(n)); + return AugAssign(expr1, operator, expr2, LINENO(n), c->c_arena); } else { int i; @@ -2056,7 +1816,7 @@ /* a normal assignment */ REQ(CHILD(n, 1), EQUAL); - targets = asdl_seq_new(NCH(n) / 2); + targets = asdl_seq_new(NCH(n) / 2, c->c_arena); if (!targets) return NULL; for (i = 0; i < NCH(n) - 2; i += 2) { @@ -2064,18 +1824,16 @@ node *ch = CHILD(n, i); if (TYPE(ch) == yield_expr) { ast_error(ch, "assignment to yield expression not possible"); - goto error; + return NULL; } e = ast_for_testlist(c, ch); /* set context to assign */ if (!e) - goto error; + return NULL; - if (!set_context(e, Store, CHILD(n, i))) { - free_expr(e); - goto error; - } + if (!set_context(e, Store, CHILD(n, i))) + return NULL; asdl_seq_SET(targets, i / 2, e); } @@ -2085,12 +1843,9 @@ else expression = ast_for_expr(c, value); if (!expression) - goto error; - return Assign(targets, expression, LINENO(n)); - error: - asdl_expr_seq_free(targets); + return NULL; + return Assign(targets, expression, LINENO(n), c->c_arena); } - return NULL; } static stmt_ty @@ -2111,21 +1866,18 @@ return NULL; start = 4; } - seq = asdl_seq_new((NCH(n) + 1 - start) / 2); + seq = asdl_seq_new((NCH(n) + 1 - start) / 2, c->c_arena); if (!seq) return NULL; for (i = start; i < NCH(n); i += 2) { expression = ast_for_expr(c, CHILD(n, i)); - if (!expression) { - free_expr(dest); - asdl_expr_seq_free(seq); + if (!expression) return NULL; - } asdl_seq_APPEND(seq, expression); } nl = (TYPE(CHILD(n, NCH(n) - 1)) == COMMA) ? false : true; - return Print(dest, seq, nl, LINENO(n)); + return Print(dest, seq, nl, LINENO(n), c->c_arena); } static asdl_seq * @@ -2137,24 +1889,18 @@ REQ(n, exprlist); - seq = asdl_seq_new((NCH(n) + 1) / 2); + seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); if (!seq) return NULL; for (i = 0; i < NCH(n); i += 2) { e = ast_for_expr(c, CHILD(n, i)); if (!e) - goto error; + return NULL; asdl_seq_SET(seq, i / 2, e); - if (context) { - if (!set_context(e, context, CHILD(n, i))) - goto error; - } + if (context && !set_context(e, context, CHILD(n, i))) + return NULL; } return seq; - -error: - asdl_expr_seq_free(seq); - return NULL; } static stmt_ty @@ -2168,7 +1914,7 @@ expr_list = ast_for_exprlist(c, CHILD(n, 1), Del); if (!expr_list) return NULL; - return Delete(expr_list, LINENO(n)); + return Delete(expr_list, LINENO(n), c->c_arena); } static stmt_ty @@ -2190,32 +1936,32 @@ ch = CHILD(n, 0); switch (TYPE(ch)) { case break_stmt: - return Break(LINENO(n)); + return Break(LINENO(n), c->c_arena); case continue_stmt: - return Continue(LINENO(n)); + return Continue(LINENO(n), c->c_arena); case yield_stmt: { /* will reduce to yield_expr */ expr_ty exp = ast_for_expr(c, CHILD(ch, 0)); if (!exp) return NULL; - return Expr(exp, LINENO(n)); + return Expr(exp, LINENO(n), c->c_arena); } case return_stmt: if (NCH(ch) == 1) - return Return(NULL, LINENO(n)); + return Return(NULL, LINENO(n), c->c_arena); else { expr_ty expression = ast_for_testlist(c, CHILD(ch, 1)); if (!expression) return NULL; - return Return(expression, LINENO(n)); + return Return(expression, LINENO(n), c->c_arena); } case raise_stmt: if (NCH(ch) == 1) - return Raise(NULL, NULL, NULL, LINENO(n)); + return Raise(NULL, NULL, NULL, LINENO(n), c->c_arena); else if (NCH(ch) == 2) { expr_ty expression = ast_for_expr(c, CHILD(ch, 1)); if (!expression) return NULL; - return Raise(expression, NULL, NULL, LINENO(n)); + return Raise(expression, NULL, NULL, LINENO(n), c->c_arena); } else if (NCH(ch) == 4) { expr_ty expr1, expr2; @@ -2227,7 +1973,7 @@ if (!expr2) return NULL; - return Raise(expr1, expr2, NULL, LINENO(n)); + return Raise(expr1, expr2, NULL, LINENO(n), c->c_arena); } else if (NCH(ch) == 6) { expr_ty expr1, expr2, expr3; @@ -2242,40 +1988,40 @@ if (!expr3) return NULL; - return Raise(expr1, expr2, expr3, LINENO(n)); + return Raise(expr1, expr2, expr3, LINENO(n), c->c_arena); } default: PyErr_Format(PyExc_SystemError, "unexpected flow_stmt: %d", TYPE(ch)); return NULL; } + + PyErr_SetString(PyExc_SystemError, "unhandled flow statement"); + return NULL; } static alias_ty -alias_for_import_name(const node *n) +alias_for_import_name(struct compiling *c, const node *n) { /* import_as_name: NAME [NAME NAME] dotted_as_name: dotted_name [NAME NAME] dotted_name: NAME ('.' NAME)* */ + PyObject *str; + loop: switch (TYPE(n)) { case import_as_name: - if (NCH(n) == 3) - return alias(NEW_IDENTIFIER(CHILD(n, 0)), - NEW_IDENTIFIER(CHILD(n, 2))); - else - return alias(NEW_IDENTIFIER(CHILD(n, 0)), - NULL); - break; + str = (NCH(n) == 3) ? NEW_IDENTIFIER(CHILD(n, 2)) : NULL; + return alias(NEW_IDENTIFIER(CHILD(n, 0)), str, c->c_arena); case dotted_as_name: if (NCH(n) == 1) { n = CHILD(n, 0); goto loop; } else { - alias_ty a = alias_for_import_name(CHILD(n, 0)); + alias_ty a = alias_for_import_name(c, CHILD(n, 0)); assert(!a->asname); a->asname = NEW_IDENTIFIER(CHILD(n, 2)); return a; @@ -2283,11 +2029,10 @@ break; case dotted_name: if (NCH(n) == 1) - return alias(NEW_IDENTIFIER(CHILD(n, 0)), NULL); + return alias(NEW_IDENTIFIER(CHILD(n, 0)), NULL, c->c_arena); else { /* Create a string of the form "a.b.c" */ int i, len; - PyObject *str; char *s; len = 0; @@ -2310,16 +2055,21 @@ --s; *s = '\0'; PyString_InternInPlace(&str); - return alias(str, NULL); + PyArena_AddPyObject(c->c_arena, str); + return alias(str, NULL, c->c_arena); } break; case STAR: - return alias(PyString_InternFromString("*"), NULL); + str = PyString_InternFromString("*"); + PyArena_AddPyObject(c->c_arena, str); + return alias(str, NULL, c->c_arena); default: PyErr_Format(PyExc_SystemError, "unexpected import name: %d", TYPE(n)); return NULL; } + + PyErr_SetString(PyExc_SystemError, "unhandled import name condition"); return NULL; } @@ -2341,25 +2091,22 @@ if (STR(CHILD(n, 0))[0] == 'i') { /* import */ n = CHILD(n, 1); REQ(n, dotted_as_names); - aliases = asdl_seq_new((NCH(n) + 1) / 2); + aliases = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); if (!aliases) return NULL; for (i = 0; i < NCH(n); i += 2) { - alias_ty import_alias = alias_for_import_name(CHILD(n, i)); - if (!import_alias) { - asdl_alias_seq_free(aliases); + alias_ty import_alias = alias_for_import_name(c, CHILD(n, i)); + if (!import_alias) return NULL; - } asdl_seq_SET(aliases, i / 2, import_alias); } - return Import(aliases, LINENO(n)); + return Import(aliases, LINENO(n), c->c_arena); } else if (STR(CHILD(n, 0))[0] == 'f') { /* from */ - stmt_ty import; int n_children; const char *from_modules; int lineno = LINENO(n); - alias_ty mod = alias_for_import_name(CHILD(n, 1)); + alias_ty mod = alias_for_import_name(c, CHILD(n, 1)); if (!mod) return NULL; @@ -2370,7 +2117,6 @@ n = CHILD(n, 3); /* from ... import x, y, z */ if (NCH(n) % 2 == 0) { /* it ends with a comma, not valid but the parser allows it */ - free_alias(mod); ast_error(n, "trailing comma not allowed without" " surrounding parentheses"); return NULL; @@ -2383,7 +2129,6 @@ n = CHILD(n, 4); /* from ... import (x, y, z) */ else { /* XXX: don't we need to call ast_error(n, "..."); */ - free_alias(mod); return NULL; } @@ -2391,36 +2136,25 @@ if (from_modules && from_modules[0] == '*') n_children = 1; - aliases = asdl_seq_new((n_children + 1) / 2); - if (!aliases) { - free_alias(mod); + aliases = asdl_seq_new((n_children + 1) / 2, c->c_arena); + if (!aliases) return NULL; - } /* handle "from ... import *" special b/c there's no children */ if (from_modules && from_modules[0] == '*') { - alias_ty import_alias = alias_for_import_name(n); - if (!import_alias) { - asdl_alias_seq_free(aliases); - free_alias(mod); + alias_ty import_alias = alias_for_import_name(c, n); + if (!import_alias) return NULL; - } asdl_seq_APPEND(aliases, import_alias); } for (i = 0; i < NCH(n); i += 2) { - alias_ty import_alias = alias_for_import_name(CHILD(n, i)); - if (!import_alias) { - asdl_alias_seq_free(aliases); - free_alias(mod); + alias_ty import_alias = alias_for_import_name(c, CHILD(n, i)); + if (!import_alias) return NULL; - } asdl_seq_APPEND(aliases, import_alias); } - Py_INCREF(mod->name); - import = ImportFrom(mod->name, aliases, lineno); - free_alias(mod); - return import; + return ImportFrom(mod->name, aliases, lineno, c->c_arena); } PyErr_Format(PyExc_SystemError, "unknown import statement: starts with command '%s'", @@ -2437,20 +2171,16 @@ int i; REQ(n, global_stmt); - s = asdl_seq_new(NCH(n) / 2); + s = asdl_seq_new(NCH(n) / 2, c->c_arena); if (!s) return NULL; for (i = 1; i < NCH(n); i += 2) { name = NEW_IDENTIFIER(CHILD(n, i)); - if (!name) { - for (i = i / 2; i > 0; i--) - Py_XDECREF((identifier) asdl_seq_GET(s, i)); - asdl_seq_free(s); /* ok */ + if (!name) return NULL; - } asdl_seq_SET(s, i / 2, name); } - return Global(s, LINENO(n)); + return Global(s, LINENO(n), c->c_arena); } static stmt_ty @@ -2481,7 +2211,7 @@ return NULL; } - return Exec(expr1, globals, locals, LINENO(n)); + return Exec(expr1, globals, locals, LINENO(n), c->c_arena); } static stmt_ty @@ -2493,7 +2223,7 @@ expr_ty expression = ast_for_expr(c, CHILD(n, 1)); if (!expression) return NULL; - return Assert(expression, NULL, LINENO(n)); + return Assert(expression, NULL, LINENO(n), c->c_arena); } else if (NCH(n) == 4) { expr_ty expr1, expr2; @@ -2505,7 +2235,7 @@ if (!expr2) return NULL; - return Assert(expr1, expr2, LINENO(n)); + return Assert(expr1, expr2, LINENO(n), c->c_arena); } PyErr_Format(PyExc_SystemError, "improper number of parts to 'assert' statement: %d", @@ -2517,7 +2247,7 @@ ast_for_suite(struct compiling *c, const node *n) { /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */ - asdl_seq *seq = NULL; + asdl_seq *seq; stmt_ty s; int i, total, num, end, pos = 0; node *ch; @@ -2525,7 +2255,7 @@ REQ(n, suite); total = num_stmts(n); - seq = asdl_seq_new(total); + seq = asdl_seq_new(total, c->c_arena); if (!seq) return NULL; if (TYPE(CHILD(n, 0)) == simple_stmt) { @@ -2541,7 +2271,7 @@ ch = CHILD(n, i); s = ast_for_stmt(c, ch); if (!s) - goto error; + return NULL; asdl_seq_SET(seq, pos++, s); } } @@ -2554,7 +2284,7 @@ /* small_stmt or compound_stmt with only one child */ s = ast_for_stmt(c, ch); if (!s) - goto error; + return NULL; asdl_seq_SET(seq, pos++, s); } else { @@ -2564,12 +2294,12 @@ for (j = 0; j < NCH(ch); j += 2) { /* statement terminates with a semi-colon ';' */ if (NCH(CHILD(ch, j)) == 0) { - assert((j + 1) == NCH(ch)); - break; + assert((j + 1) == NCH(ch)); + break; } s = ast_for_stmt(c, CHILD(ch, j)); if (!s) - goto error; + return NULL; asdl_seq_SET(seq, pos++, s); } } @@ -2577,10 +2307,6 @@ } assert(pos == seq->size); return seq; - error: - if (seq) - asdl_stmt_seq_free(seq); - return NULL; } static stmt_ty @@ -2601,13 +2327,12 @@ if (!expression) return NULL; suite_seq = ast_for_suite(c, CHILD(n, 3)); - if (!suite_seq) { - free_expr(expression); + if (!suite_seq) return NULL; - } - return If(expression, suite_seq, NULL, LINENO(n)); + return If(expression, suite_seq, NULL, LINENO(n), c->c_arena); } + s = STR(CHILD(n, 4)); /* s[2], the third character in the string, will be 's' for el_s_e, or @@ -2621,18 +2346,13 @@ if (!expression) return NULL; seq1 = ast_for_suite(c, CHILD(n, 3)); - if (!seq1) { - free_expr(expression); + if (!seq1) return NULL; - } seq2 = ast_for_suite(c, CHILD(n, 6)); - if (!seq2) { - asdl_stmt_seq_free(seq1); - free_expr(expression); + if (!seq2) return NULL; - } - return If(expression, seq1, seq2, LINENO(n)); + return If(expression, seq1, seq2, LINENO(n), c->c_arena); } else if (s[2] == 'i') { int i, n_elif, has_else = 0; @@ -2651,73 +2371,53 @@ expr_ty expression; asdl_seq *seq1, *seq2; - orelse = asdl_seq_new(1); + orelse = asdl_seq_new(1, c->c_arena); if (!orelse) return NULL; expression = ast_for_expr(c, CHILD(n, NCH(n) - 6)); - if (!expression) { - asdl_seq_free(orelse); /* ok */ + if (!expression) return NULL; - } seq1 = ast_for_suite(c, CHILD(n, NCH(n) - 4)); - if (!seq1) { - free_expr(expression); - asdl_seq_free(orelse); /* ok */ + if (!seq1) return NULL; - } seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1)); - if (!seq2) { - free_expr(expression); - asdl_stmt_seq_free(seq1); - asdl_seq_free(orelse); /* ok */ + if (!seq2) return NULL; - } asdl_seq_SET(orelse, 0, If(expression, seq1, seq2, - LINENO(CHILD(n, NCH(n) - 6)))); + LINENO(CHILD(n, NCH(n) - 6)), + c->c_arena)); /* the just-created orelse handled the last elif */ n_elif--; } - else - orelse = NULL; for (i = 0; i < n_elif; i++) { int off = 5 + (n_elif - i - 1) * 4; expr_ty expression; asdl_seq *suite_seq; - asdl_seq *new = asdl_seq_new(1); - if (!new) { - asdl_stmt_seq_free(orelse); + asdl_seq *new = asdl_seq_new(1, c->c_arena); + if (!new) return NULL; - } expression = ast_for_expr(c, CHILD(n, off)); - if (!expression) { - asdl_stmt_seq_free(orelse); - asdl_seq_free(new); /* ok */ + if (!expression) return NULL; - } suite_seq = ast_for_suite(c, CHILD(n, off + 2)); - if (!suite_seq) { - asdl_stmt_seq_free(orelse); - free_expr(expression); - asdl_seq_free(new); /* ok */ + if (!suite_seq) return NULL; - } asdl_seq_SET(new, 0, If(expression, suite_seq, orelse, - LINENO(CHILD(n, off)))); + LINENO(CHILD(n, off)), c->c_arena)); orelse = new; } return If(ast_for_expr(c, CHILD(n, 1)), ast_for_suite(c, CHILD(n, 3)), - orelse, LINENO(n)); - } - else { - PyErr_Format(PyExc_SystemError, - "unexpected token in 'if' statement: %s", s); - return NULL; + orelse, LINENO(n), c->c_arena); } + + PyErr_Format(PyExc_SystemError, + "unexpected token in 'if' statement: %s", s); + return NULL; } static stmt_ty @@ -2734,11 +2434,9 @@ if (!expression) return NULL; suite_seq = ast_for_suite(c, CHILD(n, 3)); - if (!suite_seq) { - free_expr(expression); + if (!suite_seq) return NULL; - } - return While(expression, suite_seq, NULL, LINENO(n)); + return While(expression, suite_seq, NULL, LINENO(n), c->c_arena); } else if (NCH(n) == 7) { expr_ty expression; @@ -2748,31 +2446,25 @@ if (!expression) return NULL; seq1 = ast_for_suite(c, CHILD(n, 3)); - if (!seq1) { - free_expr(expression); + if (!seq1) return NULL; - } seq2 = ast_for_suite(c, CHILD(n, 6)); - if (!seq2) { - asdl_stmt_seq_free(seq1); - free_expr(expression); + if (!seq2) return NULL; - } - return While(expression, seq1, seq2, LINENO(n)); - } - else { - PyErr_Format(PyExc_SystemError, - "wrong number of tokens for 'while' statement: %d", - NCH(n)); - return NULL; + return While(expression, seq1, seq2, LINENO(n), c->c_arena); } + + PyErr_Format(PyExc_SystemError, + "wrong number of tokens for 'while' statement: %d", + NCH(n)); + return NULL; } static stmt_ty ast_for_for_stmt(struct compiling *c, const node *n) { - asdl_seq *_target = NULL, *seq = NULL, *suite_seq = NULL; + asdl_seq *_target, *seq = NULL, *suite_seq; expr_ty expression; expr_ty target; /* for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] */ @@ -2785,32 +2477,21 @@ } _target = ast_for_exprlist(c, CHILD(n, 1), Store); - if (!_target) { - asdl_stmt_seq_free(seq); + if (!_target) return NULL; - } - if (asdl_seq_LEN(_target) == 1) { + if (asdl_seq_LEN(_target) == 1) target = asdl_seq_GET(_target, 0); - asdl_seq_free(_target); /* ok */ - } else - target = Tuple(_target, Store, LINENO(n)); + target = Tuple(_target, Store, LINENO(n), c->c_arena); expression = ast_for_testlist(c, CHILD(n, 3)); - if (!expression) { - free_expr(target); - asdl_stmt_seq_free(seq); + if (!expression) return NULL; - } suite_seq = ast_for_suite(c, CHILD(n, 5)); - if (!suite_seq) { - free_expr(target); - free_expr(expression); - asdl_stmt_seq_free(seq); + if (!suite_seq) return NULL; - } - return For(target, expression, suite_seq, seq, LINENO(n)); + return For(target, expression, suite_seq, seq, LINENO(n), c->c_arena); } static excepthandler_ty @@ -2825,7 +2506,7 @@ if (!suite_seq) return NULL; - return excepthandler(NULL, NULL, suite_seq); + return excepthandler(NULL, NULL, suite_seq, c->c_arena); } else if (NCH(exc) == 2) { expr_ty expression; @@ -2835,12 +2516,10 @@ if (!expression) return NULL; suite_seq = ast_for_suite(c, body); - if (!suite_seq) { - free_expr(expression); + if (!suite_seq) return NULL; - } - return excepthandler(expression, NULL, suite_seq); + return excepthandler(expression, NULL, suite_seq, c->c_arena); } else if (NCH(exc) == 4) { asdl_seq *suite_seq; @@ -2848,105 +2527,99 @@ expr_ty e = ast_for_expr(c, CHILD(exc, 3)); if (!e) return NULL; - if (!set_context(e, Store, CHILD(exc, 3))) { - free_expr(e); + if (!set_context(e, Store, CHILD(exc, 3))) return NULL; - } expression = ast_for_expr(c, CHILD(exc, 1)); - if (!expression) { - free_expr(e); + if (!expression) return NULL; - } suite_seq = ast_for_suite(c, body); - if (!suite_seq) { - free_expr(expression); - free_expr(e); + if (!suite_seq) return NULL; - } - return excepthandler(expression, e, suite_seq); - } - else { - PyErr_Format(PyExc_SystemError, - "wrong number of children for 'except' clause: %d", - NCH(exc)); - return NULL; + return excepthandler(expression, e, suite_seq, c->c_arena); } + + PyErr_Format(PyExc_SystemError, + "wrong number of children for 'except' clause: %d", + NCH(exc)); + return NULL; } static stmt_ty ast_for_try_stmt(struct compiling *c, const node *n) { + const int nch = NCH(n); + int n_except = (nch - 3)/3; + asdl_seq *body, *orelse = NULL, *finally = NULL; + REQ(n, try_stmt); - if (TYPE(CHILD(n, 3)) == NAME) {/* must be 'finally' */ - /* try_stmt: 'try' ':' suite 'finally' ':' suite) */ - asdl_seq *s1, *s2; - s1 = ast_for_suite(c, CHILD(n, 2)); - if (!s1) - return NULL; - s2 = ast_for_suite(c, CHILD(n, 5)); - if (!s2) { - asdl_stmt_seq_free(s1); - return NULL; - } - - return TryFinally(s1, s2, LINENO(n)); - } - else if (TYPE(CHILD(n, 3)) == except_clause) { - /* try_stmt: ('try' ':' suite (except_clause ':' suite)+ - ['else' ':' suite] - */ - asdl_seq *suite_seq1, *suite_seq2; - asdl_seq *handlers; - int i, has_else = 0, n_except = NCH(n) - 3; - if (TYPE(CHILD(n, NCH(n) - 3)) == NAME) { - has_else = 1; - n_except -= 3; - } - n_except /= 3; - handlers = asdl_seq_new(n_except); - if (!handlers) - return NULL; - for (i = 0; i < n_except; i++) { - excepthandler_ty e = ast_for_except_clause(c, - CHILD(n, 3 + i * 3), - CHILD(n, 5 + i * 3)); - if (!e) { - for ( ; i >= 0; i--) - free_excepthandler(asdl_seq_GET(handlers, i)); - asdl_seq_free(handlers); /* ok */ + body = ast_for_suite(c, CHILD(n, 2)); + if (body == NULL) + return NULL; + + if (TYPE(CHILD(n, nch - 3)) == NAME) { + if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) { + if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) { + /* we can assume it's an "else", + because nch >= 9 for try-else-finally and + it would otherwise have a type of except_clause */ + orelse = ast_for_suite(c, CHILD(n, nch - 4)); + if (orelse == NULL) + return NULL; + n_except--; + } + + finally = ast_for_suite(c, CHILD(n, nch - 1)); + if (finally == NULL) return NULL; - } - asdl_seq_SET(handlers, i, e); + n_except--; } - - suite_seq1 = ast_for_suite(c, CHILD(n, 2)); - if (!suite_seq1) { - for (i = 0; i < asdl_seq_LEN(handlers); i++) - free_excepthandler(asdl_seq_GET(handlers, i)); - asdl_seq_free(handlers); /* ok */ - return NULL; - } - if (has_else) { - suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1)); - if (!suite_seq2) { - for (i = 0; i < asdl_seq_LEN(handlers); i++) - free_excepthandler(asdl_seq_GET(handlers, i)); - asdl_seq_free(handlers); /* ok */ - asdl_stmt_seq_free(suite_seq1); + else { + /* we can assume it's an "else", + otherwise it would have a type of except_clause */ + orelse = ast_for_suite(c, CHILD(n, nch - 1)); + if (orelse == NULL) return NULL; - } + n_except--; } - else - suite_seq2 = NULL; - - return TryExcept(suite_seq1, handlers, suite_seq2, LINENO(n)); } - else { + else if (TYPE(CHILD(n, nch - 3)) != except_clause) { ast_error(n, "malformed 'try' statement"); return NULL; } + + if (n_except > 0) { + int i; + stmt_ty except_st; + /* process except statements to create a try ... except */ + asdl_seq *handlers = asdl_seq_new(n_except, c->c_arena); + if (handlers == NULL) + return NULL; + + for (i = 0; i < n_except; i++) { + excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3), + CHILD(n, 5 + i * 3)); + if (!e) + return NULL; + asdl_seq_SET(handlers, i, e); + } + + except_st = TryExcept(body, handlers, orelse, LINENO(n), c->c_arena); + if (!finally) + return except_st; + + /* if a 'finally' is present too, we nest the TryExcept within a + TryFinally to emulate try ... except ... finally */ + body = asdl_seq_new(1, c->c_arena); + if (body == NULL) + return NULL; + asdl_seq_SET(body, 0, except_st); + } + + /* must be a try ... finally (except clauses are in body, if any exist) */ + assert(finally != NULL); + return TryFinally(body, finally, LINENO(n), c->c_arena); } static stmt_ty @@ -2966,14 +2639,16 @@ s = ast_for_suite(c, CHILD(n, 3)); if (!s) return NULL; - return ClassDef(NEW_IDENTIFIER(CHILD(n, 1)), NULL, s, LINENO(n)); + return ClassDef(NEW_IDENTIFIER(CHILD(n, 1)), NULL, s, LINENO(n), + c->c_arena); } /* check for empty base list */ if (TYPE(CHILD(n,3)) == RPAR) { s = ast_for_suite(c, CHILD(n,5)); if (!s) return NULL; - return ClassDef(NEW_IDENTIFIER(CHILD(n, 1)), NULL, s, LINENO(n)); + return ClassDef(NEW_IDENTIFIER(CHILD(n, 1)), NULL, s, LINENO(n), + c->c_arena); } /* else handle the base class list */ @@ -2982,11 +2657,10 @@ return NULL; s = ast_for_suite(c, CHILD(n, 6)); - if (!s) { - asdl_expr_seq_free(bases); + if (!s) return NULL; - } - return ClassDef(NEW_IDENTIFIER(CHILD(n, 1)), bases, s, LINENO(n)); + return ClassDef(NEW_IDENTIFIER(CHILD(n, 1)), bases, s, LINENO(n), + c->c_arena); } static stmt_ty @@ -3015,7 +2689,7 @@ case del_stmt: return ast_for_del_stmt(c, n); case pass_stmt: - return Pass(LINENO(n)); + return Pass(LINENO(n), c->c_arena); case flow_stmt: return ast_for_flow_stmt(c, n); case import_stmt: @@ -3099,7 +2773,7 @@ if (imflag) { c.real = 0.; PyFPE_START_PROTECT("atof", return 0) - c.imag = atof(s); + c.imag = PyOS_ascii_atof(s); PyFPE_END_PROTECT(c) return PyComplex_FromCComplex(c); } @@ -3107,7 +2781,7 @@ #endif { PyFPE_START_PROTECT("atof", return 0) - dx = atof(s); + dx = PyOS_ascii_atof(s); PyFPE_END_PROTECT(dx) return PyFloat_FromDouble(dx); } @@ -3204,9 +2878,8 @@ static PyObject * parsestr(const char *s, const char *encoding) { - PyObject *v; size_t len; - int quote = *s; + int quote = Py_CHARMASK(*s); int rawmode = 0; int need_encoding; int unicode = 0; @@ -3259,7 +2932,7 @@ encoding. */ Py_FatalError("cannot deal with encodings in this build."); #else - PyObject* u = PyUnicode_DecodeUTF8(s, len, NULL); + PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL); if (u == NULL) return NULL; v = PyUnicode_AsEncodedString(u, encoding, NULL); @@ -3271,9 +2944,8 @@ } } - v = PyString_DecodeEscape(s, len, NULL, unicode, - need_encoding ? encoding : NULL); - return v; + return PyString_DecodeEscape(s, len, NULL, unicode, + need_encoding ? encoding : NULL); } /* Build a Python string object out of a STRING atom. This takes care of @@ -3300,13 +2972,12 @@ } #ifdef Py_USING_UNICODE else { - PyObject *temp; - temp = PyUnicode_Concat(v, s); + PyObject *temp = PyUnicode_Concat(v, s); Py_DECREF(s); - if (temp == NULL) - goto onError; Py_DECREF(v); v = temp; + if (v == NULL) + goto onError; } #endif } Modified: python/branches/ssize_t/Python/compile.c ============================================================================== --- python/branches/ssize_t/Python/compile.c (original) +++ python/branches/ssize_t/Python/compile.c Mon Jan 2 16:17:17 2006 @@ -23,6 +23,7 @@ #include "Python-ast.h" #include "node.h" +#include "pyarena.h" #include "ast.h" #include "code.h" #include "compile.h" @@ -148,6 +149,7 @@ struct compiler_unit *u; /* compiler state for current block */ PyObject *c_stack; /* Python list holding compiler_unit ptrs */ char *c_encoding; /* source encoding (a borrowed reference) */ + PyArena *c_arena; /* pointer to memory allocation arena */ }; struct assembler { @@ -169,7 +171,6 @@ static int compiler_addop_o(struct compiler *, int, PyObject *, PyObject *); static int compiler_addop_i(struct compiler *, int, int); static int compiler_addop_j(struct compiler *, int, basicblock *, int); -static void compiler_use_block(struct compiler *, basicblock *); static basicblock *compiler_use_new_block(struct compiler *); static int compiler_error(struct compiler *, const char *); static int compiler_nameop(struct compiler *, identifier, expr_context_ty); @@ -243,7 +244,8 @@ } PyCodeObject * -PyAST_Compile(mod_ty mod, const char *filename, PyCompilerFlags *flags) +PyAST_Compile(mod_ty mod, const char *filename, PyCompilerFlags *flags, + PyArena *arena) { struct compiler c; PyCodeObject *co = NULL; @@ -259,6 +261,7 @@ if (!compiler_init(&c)) goto error; c.c_filename = filename; + c.c_arena = arena; c.c_future = PyFuture_FromAST(mod, filename); if (c.c_future == NULL) goto error; @@ -292,12 +295,12 @@ PyCodeObject * PyNode_Compile(struct _node *n, const char *filename) { - PyCodeObject *co; - mod_ty mod = PyAST_FromNode(n, NULL, filename); - if (!mod) - return NULL; - co = PyAST_Compile(mod, filename, NULL); - free_mod(mod); + PyCodeObject *co = NULL; + PyArena *arena = PyArena_New(); + mod_ty mod = PyAST_FromNode(n, NULL, filename, arena); + if (mod) + co = PyAST_Compile(mod, filename, NULL, arena); + PyArena_Free(arena); return co; } @@ -1070,12 +1073,16 @@ struct compiler_unit *u; u = PyObject_Malloc(sizeof(struct compiler_unit)); + if (!u) { + PyErr_NoMemory(); + return 0; + } memset(u, 0, sizeof(struct compiler_unit)); u->u_argcount = 0; u->u_ste = PySymtable_Lookup(c->c_st, key); if (!u->u_ste) { compiler_unit_free(u); - return 0; + return 0; } Py_INCREF(name); u->u_name = name; @@ -1158,8 +1165,10 @@ u = c->u; b = (basicblock *)PyObject_Malloc(sizeof(basicblock)); - if (b == NULL) + if (b == NULL) { + PyErr_NoMemory(); return NULL; + } memset((void *)b, 0, sizeof(basicblock)); assert (b->b_next == NULL); b->b_list = u->u_blocks; @@ -1167,13 +1176,6 @@ return b; } -static void -compiler_use_block(struct compiler *c, basicblock *block) -{ - assert (block != NULL); - c->u->u_curblock = block; -} - static basicblock * compiler_use_new_block(struct compiler *c) { @@ -2518,7 +2520,7 @@ else { ADDOP_I(c, RAISE_VARARGS, 1); } - compiler_use_block(c, end); + compiler_use_next_block(c, end); ADDOP(c, POP_TOP); return 1; } @@ -2749,8 +2751,7 @@ return INPLACE_FLOOR_DIVIDE; } PyErr_Format(PyExc_SystemError, - "inplace binary op %d should not be possible", - op); + "inplace binary op %d should not be possible", op); return 0; } @@ -2798,6 +2799,9 @@ case GLOBAL_EXPLICIT: optype = OP_GLOBAL; break; + default: + /* scope can be 0 */ + break; } /* XXX Leave assert here, but handle __doc__ and the like better */ @@ -2819,6 +2823,7 @@ Py_DECREF(mangled); return 0; case Param: + default: PyErr_SetString(PyExc_SystemError, "param invalid for deref variable"); return 0; @@ -2833,6 +2838,7 @@ case AugStore: break; case Param: + default: PyErr_SetString(PyExc_SystemError, "param invalid for local variable"); return 0; @@ -2849,6 +2855,7 @@ case AugStore: break; case Param: + default: PyErr_SetString(PyExc_SystemError, "param invalid for global variable"); return 0; @@ -2863,6 +2870,7 @@ case AugStore: break; case Param: + default: PyErr_SetString(PyExc_SystemError, "param invalid for name variable"); return 0; @@ -3350,6 +3358,7 @@ ADDOP_NAME(c, DELETE_ATTR, e->v.Attribute.attr, names); break; case Param: + default: PyErr_SetString(PyExc_SystemError, "param invalid in attribute expression"); return 0; @@ -3377,6 +3386,7 @@ VISIT_SLICE(c, e->v.Subscript.slice, Del); break; case Param: + default: PyErr_SetString(PyExc_SystemError, "param invalid in subscript expression"); return 0; @@ -3404,7 +3414,7 @@ switch (e->kind) { case Attribute_kind: auge = Attribute(e->v.Attribute.value, e->v.Attribute.attr, - AugLoad, e->lineno); + AugLoad, e->lineno, c->c_arena); if (auge == NULL) return 0; VISIT(c, expr, auge); @@ -3412,11 +3422,10 @@ ADDOP(c, inplace_binop(c, s->v.AugAssign.op)); auge->v.Attribute.ctx = AugStore; VISIT(c, expr, auge); - free(auge); break; case Subscript_kind: auge = Subscript(e->v.Subscript.value, e->v.Subscript.slice, - AugLoad, e->lineno); + AugLoad, e->lineno, c->c_arena); if (auge == NULL) return 0; VISIT(c, expr, auge); @@ -3424,7 +3433,6 @@ ADDOP(c, inplace_binop(c, s->v.AugAssign.op)); auge->v.Subscript.ctx = AugStore; VISIT(c, expr, auge); - free(auge); break; case Name_kind: VISIT(c, expr, s->v.AugAssign.target); @@ -3432,8 +3440,9 @@ ADDOP(c, inplace_binop(c, s->v.AugAssign.op)); return compiler_nameop(c, e->v.Name.id, Store); default: - fprintf(stderr, - "invalid node type for augmented assignment\n"); + PyErr_Format(PyExc_SystemError, + "invalid node type (%d) for augmented assignment", + e->kind); return 0; } return 1; @@ -3505,9 +3514,9 @@ case Store: op = STORE_SUBSCR; break; case Del: op = DELETE_SUBSCR; break; case Param: - fprintf(stderr, - "invalid %s kind %d in subscript\n", - kind, ctx); + PyErr_Format(PyExc_SystemError, + "invalid %s kind %d in subscript\n", + kind, ctx); return 0; } if (ctx == AugLoad) { @@ -3590,6 +3599,7 @@ case Store: op = STORE_SLICE; break; case Del: op = DELETE_SLICE; break; case Param: + default: PyErr_SetString(PyExc_SystemError, "param invalid in simple slice"); return 0; @@ -3609,11 +3619,11 @@ break; case Slice_kind: return compiler_slice(c, s, ctx); - break; case Index_kind: VISIT(c, expr, s->v.Index.value); break; case ExtSlice_kind: + default: PyErr_SetString(PyExc_SystemError, "extended slice invalid in nested slice"); return 0; @@ -3655,6 +3665,10 @@ if (ctx != AugStore) VISIT(c, expr, s->v.Index.value); return compiler_handle_subscr(c, "index", ctx); + default: + PyErr_Format(PyExc_SystemError, + "invalid slice %d", s->kind); + return 0; } return 1; } @@ -3744,8 +3758,10 @@ return 0; a->a_postorder = (basicblock **)PyObject_Malloc( sizeof(basicblock *) * nblocks); - if (!a->a_postorder) + if (!a->a_postorder) { + PyErr_NoMemory(); return 0; + } return 1; } Modified: python/branches/ssize_t/Python/dynload_aix.c ============================================================================== --- python/branches/ssize_t/Python/dynload_aix.c (original) +++ python/branches/ssize_t/Python/dynload_aix.c Mon Jan 2 16:17:17 2006 @@ -144,7 +144,7 @@ if (nerr == load_errtab[j].errNo && load_errtab[j].errstr) ERRBUF_APPEND(load_errtab[j].errstr); } - while (isdigit(*message[i])) message[i]++ ; + while (isdigit(Py_CHARMASK(*message[i]))) message[i]++ ; ERRBUF_APPEND(message[i]); ERRBUF_APPEND("\n"); } Modified: python/branches/ssize_t/Python/getargs.c ============================================================================== --- python/branches/ssize_t/Python/getargs.c (original) +++ python/branches/ssize_t/Python/getargs.c Mon Jan 2 16:17:17 2006 @@ -213,7 +213,7 @@ if (level == 0) { if (c == 'O') max++; - else if (isalpha(c)) { + else if (isalpha(Py_CHARMASK(c))) { if (c != 'e') /* skip encoded */ max++; } else if (c == '|') @@ -303,7 +303,7 @@ } } - if (*format != '\0' && !isalpha((int)(*format)) && + if (*format != '\0' && !isalpha(Py_CHARMASK(*format)) && *format != '(' && *format != '|' && *format != ':' && *format != ';') { PyErr_Format(PyExc_SystemError, @@ -396,7 +396,7 @@ } else if (c == ':' || c == ';' || c == '\0') break; - else if (level == 0 && isalpha(c)) + else if (level == 0 && isalpha(Py_CHARMASK(c))) n++; } @@ -1355,7 +1355,7 @@ min = -1; max = 0; while ((i = *format++) != '\0') { - if (isalpha(i) && i != 'e') { + if (isalpha(Py_CHARMASK(i)) && i != 'e') { max++; if (*p == NULL) { PyErr_SetString(PyExc_RuntimeError, Modified: python/branches/ssize_t/Python/graminit.c ============================================================================== --- python/branches/ssize_t/Python/graminit.c (original) +++ python/branches/ssize_t/Python/graminit.c Mon Jan 2 16:17:17 2006 @@ -512,7 +512,8 @@ static arc arcs_26_0[1] = { {74, 1}, }; -static arc arcs_26_1[1] = { +static arc arcs_26_1[2] = { + {75, 1}, {12, 2}, }; static arc arcs_26_2[1] = { @@ -521,20 +522,20 @@ static arc arcs_26_3[3] = { {28, 4}, {13, 5}, - {75, 4}, + {76, 4}, }; static arc arcs_26_4[1] = { {0, 4}, }; static arc arcs_26_5[1] = { - {75, 6}, + {76, 6}, }; static arc arcs_26_6[1] = { {15, 4}, }; static state states_26[7] = { {1, arcs_26_0}, - {1, arcs_26_1}, + {2, arcs_26_1}, {1, arcs_26_2}, {3, arcs_26_3}, {1, arcs_26_4}, @@ -580,14 +581,14 @@ {1, arcs_28_3}, }; static arc arcs_29_0[1] = { - {76, 1}, + {77, 1}, }; static arc arcs_29_1[2] = { {27, 2}, {0, 1}, }; static arc arcs_29_2[2] = { - {76, 1}, + {77, 1}, {0, 2}, }; static state states_29[3] = { @@ -596,7 +597,7 @@ {2, arcs_29_2}, }; static arc arcs_30_0[1] = { - {77, 1}, + {78, 1}, }; static arc arcs_30_1[2] = { {27, 0}, @@ -610,7 +611,7 @@ {19, 1}, }; static arc arcs_31_1[2] = { - {78, 0}, + {75, 0}, {0, 1}, }; static state states_31[2] = { @@ -841,15 +842,26 @@ static arc arcs_39_7[1] = { {22, 9}, }; -static arc arcs_39_8[3] = { +static arc arcs_39_8[4] = { {95, 4}, - {91, 5}, + {91, 10}, + {96, 5}, {0, 8}, }; static arc arcs_39_9[1] = { {0, 9}, }; -static state states_39[10] = { +static arc arcs_39_10[1] = { + {21, 11}, +}; +static arc arcs_39_11[1] = { + {22, 12}, +}; +static arc arcs_39_12[2] = { + {96, 5}, + {0, 12}, +}; +static state states_39[13] = { {1, arcs_39_0}, {1, arcs_39_1}, {1, arcs_39_2}, @@ -858,8 +870,11 @@ {1, arcs_39_5}, {1, arcs_39_6}, {1, arcs_39_7}, - {3, arcs_39_8}, + {4, arcs_39_8}, {1, arcs_39_9}, + {1, arcs_39_10}, + {1, arcs_39_11}, + {2, arcs_39_12}, }; static arc arcs_40_0[1] = { {97, 1}, @@ -1237,7 +1252,7 @@ static arc arcs_59_0[3] = { {13, 1}, {136, 2}, - {78, 3}, + {75, 3}, }; static arc arcs_59_1[2] = { {14, 4}, @@ -1284,12 +1299,12 @@ {2, arcs_60_2}, }; static arc arcs_61_0[3] = { - {78, 1}, + {75, 1}, {26, 2}, {21, 3}, }; static arc arcs_61_1[1] = { - {78, 4}, + {75, 4}, }; static arc arcs_61_2[2] = { {21, 3}, @@ -1301,7 +1316,7 @@ {0, 3}, }; static arc arcs_61_4[1] = { - {78, 6}, + {75, 6}, }; static arc arcs_61_5[2] = { {151, 6}, @@ -1754,7 +1769,7 @@ "\000\000\000\000\000\000\000\000\000\000\000\020\000\000\000\000\000\000\000\000\000"}, {294, "for_stmt", 0, 10, states_38, "\000\000\000\000\000\000\000\000\000\000\000\040\000\000\000\000\000\000\000\000\000"}, - {295, "try_stmt", 0, 10, states_39, + {295, "try_stmt", 0, 13, states_39, "\000\000\000\000\000\000\000\000\000\000\000\100\000\000\000\000\000\000\000\000\000"}, {296, "except_clause", 0, 5, states_40, "\000\000\000\000\000\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000\000"}, @@ -1795,11 +1810,11 @@ {314, "lambdef", 0, 5, states_58, "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\020\000\000"}, {315, "trailer", 0, 7, states_59, - "\000\040\000\000\000\000\000\000\000\100\000\000\000\000\000\000\000\001\000\000\000"}, + "\000\040\000\000\000\000\000\000\000\010\000\000\000\000\000\000\000\001\000\000\000"}, {316, "subscriptlist", 0, 3, states_60, - "\000\040\050\000\000\000\000\000\000\100\000\000\000\002\000\140\010\111\023\000\000"}, + "\000\040\050\000\000\000\000\000\000\010\000\000\000\002\000\140\010\111\023\000\000"}, {317, "subscript", 0, 7, states_61, - "\000\040\050\000\000\000\000\000\000\100\000\000\000\002\000\140\010\111\023\000\000"}, + "\000\040\050\000\000\000\000\000\000\010\000\000\000\002\000\140\010\111\023\000\000"}, {318, "sliceop", 0, 3, states_62, "\000\000\040\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {319, "exprlist", 0, 3, states_63, @@ -1911,10 +1926,10 @@ {1, "import"}, {286, 0}, {1, "from"}, + {23, 0}, {285, 0}, {283, 0}, {284, 0}, - {23, 0}, {1, "global"}, {1, "exec"}, {303, 0}, Modified: python/branches/ssize_t/Python/import.c ============================================================================== --- python/branches/ssize_t/Python/import.c (original) +++ python/branches/ssize_t/Python/import.c Mon Jan 2 16:17:17 2006 @@ -4,6 +4,7 @@ #include "Python.h" #include "Python-ast.h" +#include "pyarena.h" #include "pythonrun.h" #include "errcode.h" #include "marshal.h" @@ -773,13 +774,14 @@ { PyCodeObject *co = NULL; mod_ty mod; + PyArena *arena = PyArena_New(); mod = PyParser_ASTFromFile(fp, pathname, Py_file_input, 0, 0, 0, - NULL); + NULL, arena); if (mod) { - co = PyAST_Compile(mod, pathname, NULL); - free_mod(mod); + co = PyAST_Compile(mod, pathname, NULL, arena); } + PyArena_Free(arena); return co; } Modified: python/branches/ssize_t/Python/modsupport.c ============================================================================== --- python/branches/ssize_t/Python/modsupport.c (original) +++ python/branches/ssize_t/Python/modsupport.c Mon Jan 2 16:17:17 2006 @@ -307,7 +307,7 @@ { unsigned int n; n = va_arg(*p_va, unsigned int); - if (n > PyInt_GetMax()) + if (n > (unsigned long)PyInt_GetMax()) return PyLong_FromUnsignedLong((unsigned long)n); else return PyInt_FromLong(n); @@ -325,7 +325,7 @@ { unsigned long n; n = va_arg(*p_va, unsigned long); - if (n > PyInt_GetMax()) + if (n > (unsigned long)PyInt_GetMax()) return PyLong_FromUnsignedLong(n); else return PyInt_FromLong(n); Modified: python/branches/ssize_t/Python/pystrtod.c ============================================================================== --- python/branches/ssize_t/Python/pystrtod.c (original) +++ python/branches/ssize_t/Python/pystrtod.c Mon Jan 2 16:17:17 2006 @@ -38,18 +38,16 @@ * Return value: the #gdouble value. **/ double -PyOS_ascii_strtod(const char *nptr, - char **endptr) +PyOS_ascii_strtod(const char *nptr, char **endptr) { char *fail_pos; - double val; + double val = -1.0; struct lconv *locale_data; const char *decimal_point; int decimal_point_len; const char *p, *decimal_point_pos; const char *end = NULL; /* Silence gcc */ -/* g_return_val_if_fail (nptr != NULL, 0); */ assert(nptr != NULL); fail_pos = NULL; @@ -73,64 +71,36 @@ if (*p == '+' || *p == '-') p++; - if (p[0] == '0' && - (p[1] == 'x' || p[1] == 'X')) + while (ISDIGIT(*p)) + p++; + + if (*p == '.') { - p += 2; - /* HEX - find the (optional) decimal point */ + decimal_point_pos = p++; - while (ISXDIGIT(*p)) + while (ISDIGIT(*p)) p++; - if (*p == '.') - { - decimal_point_pos = p++; - - while (ISXDIGIT(*p)) - p++; - - if (*p == 'p' || *p == 'P') - p++; - if (*p == '+' || *p == '-') - p++; - while (ISDIGIT(*p)) - p++; - end = p; - } - } - else - { + if (*p == 'e' || *p == 'E') + p++; + if (*p == '+' || *p == '-') + p++; while (ISDIGIT(*p)) p++; - - if (*p == '.') - { - decimal_point_pos = p++; - - while (ISDIGIT(*p)) - p++; - - if (*p == 'e' || *p == 'E') - p++; - if (*p == '+' || *p == '-') - p++; - while (ISDIGIT(*p)) - p++; - end = p; - } + end = p; } - /* For the other cases, we need not convert the decimal point */ + /* For the other cases, we need not convert the decimal point */ } - /* Set errno to zero, so that we can distinguish zero results - and underflows */ + /* Set errno to zero, so that we can distinguish zero results + and underflows */ errno = 0; if (decimal_point_pos) { char *copy, *c; - /* We need to convert the '.' to the locale specific decimal point */ + /* We need to convert the '.' to the locale specific decimal point */ copy = malloc(end - nptr + 1 + decimal_point_len); c = copy; @@ -155,8 +125,15 @@ free(copy); } - else - val = strtod(nptr, &fail_pos); + else { + unsigned i = 0; + if (nptr[i] == '-') + i++; + if (nptr[i] == '0' && (nptr[i+1] == 'x' || nptr[i+1] == 'X')) + fail_pos = (char*)nptr; + else + val = strtod(nptr, &fail_pos); + } if (endptr) *endptr = fail_pos; Modified: python/branches/ssize_t/Python/pythonrun.c ============================================================================== --- python/branches/ssize_t/Python/pythonrun.c (original) +++ python/branches/ssize_t/Python/pythonrun.c Mon Jan 2 16:17:17 2006 @@ -12,6 +12,7 @@ #include "code.h" #include "compile.h" #include "symtable.h" +#include "pyarena.h" #include "ast.h" #include "eval.h" #include "marshal.h" @@ -36,9 +37,9 @@ static void initmain(void); static void initsite(void); static PyObject *run_err_mod(mod_ty, const char *, PyObject *, PyObject *, - PyCompilerFlags *); + PyCompilerFlags *, PyArena *arena); static PyObject *run_mod(mod_ty, const char *, PyObject *, PyObject *, - PyCompilerFlags *); + PyCompilerFlags *, PyArena *); static PyObject *run_pyc_file(FILE *, const char *, PyObject *, PyObject *, PyCompilerFlags *); static void err_input(perrdetail *); @@ -697,6 +698,7 @@ { PyObject *m, *d, *v, *w; mod_ty mod; + PyArena *arena; char *ps1 = "", *ps2 = ""; int errcode = 0; @@ -716,12 +718,14 @@ else if (PyString_Check(w)) ps2 = PyString_AsString(w); } + arena = PyArena_New(); mod = PyParser_ASTFromFile(fp, filename, Py_single_input, ps1, ps2, - flags, &errcode); + flags, &errcode, arena); Py_XDECREF(v); Py_XDECREF(w); if (mod == NULL) { + PyArena_Free(arena); if (errcode == E_EOF) { PyErr_Clear(); return E_EOF; @@ -730,11 +734,13 @@ return -1; } m = PyImport_AddModule("__main__"); - if (m == NULL) + if (m == NULL) { + PyArena_Free(arena); return -1; + } d = PyModule_GetDict(m); - v = run_mod(mod, filename, d, d, flags); - free_mod(mod); + v = run_mod(mod, filename, d, d, flags, arena); + PyArena_Free(arena); if (v == NULL) { PyErr_Print(); return -1; @@ -1155,9 +1161,11 @@ PyObject *locals, PyCompilerFlags *flags) { PyObject *ret; - mod_ty mod = PyParser_ASTFromString(str, "", start, flags); - ret = run_err_mod(mod, "", globals, locals, flags); - free_mod(mod); + PyArena *arena = PyArena_New(); + mod_ty mod = PyParser_ASTFromString(str, "", start, flags, + arena); + ret = run_err_mod(mod, "", globals, locals, flags, arena); + PyArena_Free(arena); return ret; } @@ -1166,33 +1174,36 @@ PyObject *locals, int closeit, PyCompilerFlags *flags) { PyObject *ret; + PyArena *arena = PyArena_New(); mod_ty mod = PyParser_ASTFromFile(fp, filename, start, 0, 0, - flags, NULL); - if (mod == NULL) + flags, NULL, arena); + if (mod == NULL) { + PyArena_Free(arena); return NULL; + } if (closeit) fclose(fp); - ret = run_err_mod(mod, filename, globals, locals, flags); - free_mod(mod); + ret = run_err_mod(mod, filename, globals, locals, flags, arena); + PyArena_Free(arena); return ret; } static PyObject * run_err_mod(mod_ty mod, const char *filename, PyObject *globals, - PyObject *locals, PyCompilerFlags *flags) + PyObject *locals, PyCompilerFlags *flags, PyArena *arena) { if (mod == NULL) return NULL; - return run_mod(mod, filename, globals, locals, flags); + return run_mod(mod, filename, globals, locals, flags, arena); } static PyObject * run_mod(mod_ty mod, const char *filename, PyObject *globals, PyObject *locals, - PyCompilerFlags *flags) + PyCompilerFlags *flags, PyArena *arena) { PyCodeObject *co; PyObject *v; - co = PyAST_Compile(mod, filename, flags); + co = PyAST_Compile(mod, filename, flags, arena); if (co == NULL) return NULL; v = PyEval_EvalCode(co, globals, locals); @@ -1236,43 +1247,45 @@ Py_CompileStringFlags(const char *str, const char *filename, int start, PyCompilerFlags *flags) { - mod_ty mod; PyCodeObject *co; - mod = PyParser_ASTFromString(str, filename, start, flags); - if (mod == NULL) + PyArena *arena = PyArena_New(); + mod_ty mod = PyParser_ASTFromString(str, filename, start, flags, arena); + if (mod == NULL) { + PyArena_Free(arena); return NULL; - co = PyAST_Compile(mod, filename, flags); - free_mod(mod); + } + co = PyAST_Compile(mod, filename, flags, arena); + PyArena_Free(arena); return (PyObject *)co; } struct symtable * Py_SymtableString(const char *str, const char *filename, int start) { - mod_ty mod; struct symtable *st; - - mod = PyParser_ASTFromString(str, filename, start, NULL); - if (mod == NULL) + PyArena *arena = PyArena_New(); + mod_ty mod = PyParser_ASTFromString(str, filename, start, NULL, arena); + if (mod == NULL) { + PyArena_Free(arena); return NULL; + } st = PySymtable_Build(mod, filename, 0); - free_mod(mod); + PyArena_Free(arena); return st; } /* Preferred access to parser is through AST. */ mod_ty PyParser_ASTFromString(const char *s, const char *filename, int start, - PyCompilerFlags *flags) + PyCompilerFlags *flags, PyArena *arena) { - node *n; mod_ty mod; perrdetail err; - n = PyParser_ParseStringFlagsFilename(s, filename, &_PyParser_Grammar, - start, &err, - PARSER_FLAGS(flags)); + node *n = PyParser_ParseStringFlagsFilename(s, filename, + &_PyParser_Grammar, start, &err, + PARSER_FLAGS(flags)); if (n) { - mod = PyAST_FromNode(n, flags, filename); + mod = PyAST_FromNode(n, flags, filename, arena); PyNode_Free(n); return mod; } @@ -1284,15 +1297,15 @@ mod_ty PyParser_ASTFromFile(FILE *fp, const char *filename, int start, char *ps1, - char *ps2, PyCompilerFlags *flags, int *errcode) + char *ps2, PyCompilerFlags *flags, int *errcode, + PyArena *arena) { - node *n; mod_ty mod; perrdetail err; - n = PyParser_ParseFileFlags(fp, filename, &_PyParser_Grammar, start, - ps1, ps2, &err, PARSER_FLAGS(flags)); + node *n = PyParser_ParseFileFlags(fp, filename, &_PyParser_Grammar, + start, ps1, ps2, &err, PARSER_FLAGS(flags)); if (n) { - mod = PyAST_FromNode(n, flags, filename); + mod = PyAST_FromNode(n, flags, filename, arena); PyNode_Free(n); return mod; } @@ -1309,10 +1322,9 @@ node * PyParser_SimpleParseFileFlags(FILE *fp, const char *filename, int start, int flags) { - node *n; perrdetail err; - n = PyParser_ParseFileFlags(fp, filename, &_PyParser_Grammar, start, - (char *)0, (char *)0, &err, flags); + node *n = PyParser_ParseFileFlags(fp, filename, &_PyParser_Grammar, + start, NULL, NULL, &err, flags); if (n == NULL) err_input(&err); @@ -1324,10 +1336,9 @@ node * PyParser_SimpleParseStringFlags(const char *str, int start, int flags) { - node *n; perrdetail err; - n = PyParser_ParseStringFlags(str, &_PyParser_Grammar, start, &err, - flags); + node *n = PyParser_ParseStringFlags(str, &_PyParser_Grammar, + start, &err, flags); if (n == NULL) err_input(&err); return n; @@ -1337,12 +1348,9 @@ PyParser_SimpleParseStringFlagsFilename(const char *str, const char *filename, int start, int flags) { - node *n; perrdetail err; - - n = PyParser_ParseStringFlagsFilename(str, filename, - &_PyParser_Grammar, - start, &err, flags); + node *n = PyParser_ParseStringFlagsFilename(str, filename, + &_PyParser_Grammar, start, &err, flags); if (n == NULL) err_input(&err); return n; @@ -1351,8 +1359,7 @@ node * PyParser_SimpleParseStringFilename(const char *str, const char *filename, int start) { - return PyParser_SimpleParseStringFlagsFilename(str, filename, - start, 0); + return PyParser_SimpleParseStringFlagsFilename(str, filename, start, 0); } /* May want to move a more generalized form of this to parsetok.c or @@ -1432,8 +1439,8 @@ } if (msg == NULL) msg = "unknown decode error"; - Py_DECREF(type); - Py_DECREF(value); + Py_XDECREF(type); + Py_XDECREF(value); Py_XDECREF(tb); break; } Modified: python/branches/ssize_t/Python/structmember.c ============================================================================== --- python/branches/ssize_t/Python/structmember.c (original) +++ python/branches/ssize_t/Python/structmember.c Mon Jan 2 16:17:17 2006 @@ -29,7 +29,7 @@ } PyObject * -PyMember_Get(char *addr, struct memberlist *mlist, char *name) +PyMember_Get(const char *addr, struct memberlist *mlist, const char *name) { struct memberlist *l; @@ -51,7 +51,7 @@ } PyObject * -PyMember_GetOne(char *addr, PyMemberDef *l) +PyMember_GetOne(const char *addr, PyMemberDef *l) { PyObject *v; if ((l->flags & READ_RESTRICTED) && @@ -134,7 +134,7 @@ } int -PyMember_Set(char *addr, struct memberlist *mlist, char *name, PyObject *v) +PyMember_Set(char *addr, struct memberlist *mlist, const char *name, PyObject *v) { struct memberlist *l; Modified: python/branches/ssize_t/Python/symtable.c ============================================================================== --- python/branches/ssize_t/Python/symtable.c (original) +++ python/branches/ssize_t/Python/symtable.c Mon Jan 2 16:17:17 2006 @@ -4,13 +4,16 @@ #include "symtable.h" #include "structmember.h" -/* two error strings used for warnings */ +/* error strings used for warnings */ #define GLOBAL_AFTER_ASSIGN \ "name '%.400s' is assigned to before global declaration" #define GLOBAL_AFTER_USE \ "name '%.400s' is used prior to global declaration" +#define IMPORT_STAR_WARNING "import * only allowed at module level" + + PySTEntryObject * PySTEntry_New(struct symtable *st, identifier name, _Py_block_ty block, void *key, int lineno) @@ -152,7 +155,7 @@ }; static int symtable_analyze(struct symtable *st); -static int symtable_warn(struct symtable *st, char *msg); +static int symtable_warn(struct symtable *st, char *msg, int lineno); static int symtable_enter_block(struct symtable *st, identifier name, _Py_block_ty block, void *ast, int lineno); static int symtable_exit_block(struct symtable *st, void *ast); @@ -686,10 +689,10 @@ static int -symtable_warn(struct symtable *st, char *msg) +symtable_warn(struct symtable *st, char *msg, int lineno) { if (PyErr_WarnExplicit(PyExc_SyntaxWarning, msg, st->st_filename, - st->st_cur->ste_lineno, NULL, NULL) < 0) { + lineno, NULL, NULL) < 0) { if (PyErr_ExceptionMatches(PyExc_SyntaxWarning)) { PyErr_SetString(PyExc_SyntaxError, msg); PyErr_SyntaxLocation(st->st_filename, @@ -1028,7 +1031,7 @@ PyOS_snprintf(buf, sizeof(buf), GLOBAL_AFTER_USE, c_name); - if (!symtable_warn(st, buf)) + if (!symtable_warn(st, buf, s->lineno)) return 0; } if (!symtable_add_def(st, name, DEF_GLOBAL)) @@ -1277,8 +1280,8 @@ } else { if (st->st_cur->ste_type != ModuleBlock) { - if (!symtable_warn(st, - "import * only allowed at module level")) { + int lineno = st->st_cur->ste_lineno; + if (!symtable_warn(st, IMPORT_STAR_WARNING, lineno)) { Py_DECREF(store_name); return 0; } Modified: python/branches/ssize_t/Python/sysmodule.c ============================================================================== --- python/branches/ssize_t/Python/sysmodule.c (original) +++ python/branches/ssize_t/Python/sysmodule.c Mon Jan 2 16:17:17 2006 @@ -1003,6 +1003,9 @@ PyDict_SetItemString(sysdict, "hexversion", v = PyInt_FromLong(PY_VERSION_HEX)); Py_XDECREF(v); + PyDict_SetItemString(sysdict, "build_number", + v = PyString_FromString(Py_GetBuildNumber())); + Py_XDECREF(v); /* * These release level checks are mutually exclusive and cover * the field, so don't get too fancy with the pre-processor! Modified: python/branches/ssize_t/Tools/bgen/bgen/bgenGenerator.py ============================================================================== --- python/branches/ssize_t/Tools/bgen/bgen/bgenGenerator.py (original) +++ python/branches/ssize_t/Tools/bgen/bgen/bgenGenerator.py Mon Jan 2 16:17:17 2006 @@ -218,10 +218,10 @@ def beginallowthreads(self): pass - + def endallowthreads(self): pass - + def callit(self): args = "" s = "%s%s(" % (self.getrvforcallit(), self.callname) Modified: python/branches/ssize_t/Tools/bgen/bgen/bgenHeapBuffer.py ============================================================================== --- python/branches/ssize_t/Tools/bgen/bgen/bgenHeapBuffer.py (original) +++ python/branches/ssize_t/Tools/bgen/bgen/bgenHeapBuffer.py Mon Jan 2 16:17:17 2006 @@ -111,10 +111,10 @@ def passOutput(self, name): return "%s__out__, %s__len__, &%s__len__" % (name, name, name) - + class MallocHeapOutputBufferType(HeapOutputBufferType): """Output buffer allocated by the called function -- passed as (&buffer, &size). - + Instantiate without parameters. Call from Python without parameters. """ @@ -124,7 +124,7 @@ def getAuxDeclarations(self, name): return [] - + def passOutput(self, name): return "&%s__out__, &%s__len__" % (name, name) @@ -133,10 +133,9 @@ def getargsArgs(self, name): return None - + def mkvalueFormat(self): return "z#" - + def cleanup(self, name): Output("if( %s__out__ ) free(%s__out__);", name, name) - Modified: python/branches/ssize_t/Tools/compiler/dumppyc.py ============================================================================== --- python/branches/ssize_t/Tools/compiler/dumppyc.py (original) +++ python/branches/ssize_t/Tools/compiler/dumppyc.py Mon Jan 2 16:17:17 2006 @@ -43,5 +43,5 @@ buf = open(filename).read() co = compile(buf, filename, "exec") walk(co) - else: + else: load(filename, codename) Modified: python/branches/ssize_t/Tools/pynche/StripViewer.py ============================================================================== --- python/branches/ssize_t/Tools/pynche/StripViewer.py (original) +++ python/branches/ssize_t/Tools/pynche/StripViewer.py Mon Jan 2 16:17:17 2006 @@ -151,9 +151,9 @@ return arrow, text def _x(self): - coords = self._canvas.bbox(self._TAG) + coords = self._canvas.coords(self._TAG) assert coords - return coords[2] - 6 # BAW: kludge + return coords[0] + self._ARROWWIDTH Modified: python/branches/ssize_t/Tools/unicode/gencodec.py ============================================================================== --- python/branches/ssize_t/Tools/unicode/gencodec.py (original) +++ python/branches/ssize_t/Tools/unicode/gencodec.py Mon Jan 2 16:17:17 2006 @@ -295,8 +295,8 @@ else: l.append('''\ return codecs.charmap_decode(input,errors,decoding_map)''') - - l.append(''' + + l.append(''' class StreamWriter(Codec,codecs.StreamWriter): pass @@ -330,7 +330,7 @@ # Final new-line l.append('\n') - + return '\n'.join(l) def pymap(name,map,pyfile,comments=1): Modified: python/branches/ssize_t/configure ============================================================================== --- python/branches/ssize_t/configure (original) +++ python/branches/ssize_t/configure Mon Jan 2 16:17:17 2006 @@ -1,5 +1,5 @@ #! /bin/sh -# From configure.in Revision: 41695 . +# From configure.in Revision: 41731 . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.59 for python 2.5. # @@ -10891,6 +10891,7 @@ case $ac_sys_system/$ac_sys_release in SunOS*) if test "$GCC" = yes; then CCSHARED="-fPIC"; + else CCSHARED="-xcode=pic32"; fi;; hp*|HP*) if test "$GCC" = yes; then CCSHARED="-fPIC"; @@ -21293,7 +21294,7 @@ done -SRCDIRS="Parser Grammar Objects Python Modules Mac Mac/Python" +SRCDIRS="Parser Grammar Objects Python Modules Mac" echo "$as_me:$LINENO: checking for build directories" >&5 echo $ECHO_N "checking for build directories... $ECHO_C" >&6 for dir in $SRCDIRS; do Modified: python/branches/ssize_t/configure.in ============================================================================== --- python/branches/ssize_t/configure.in (original) +++ python/branches/ssize_t/configure.in Mon Jan 2 16:17:17 2006 @@ -1413,6 +1413,7 @@ case $ac_sys_system/$ac_sys_release in SunOS*) if test "$GCC" = yes; then CCSHARED="-fPIC"; + else CCSHARED="-xcode=pic32"; fi;; hp*|HP*) if test "$GCC" = yes; then CCSHARED="-fPIC"; @@ -3149,7 +3150,7 @@ done AC_SUBST(SRCDIRS) -SRCDIRS="Parser Grammar Objects Python Modules Mac Mac/Python" +SRCDIRS="Parser Grammar Objects Python Modules Mac" AC_MSG_CHECKING(for build directories) for dir in $SRCDIRS; do if test ! -d $dir; then Modified: python/branches/ssize_t/setup.py ============================================================================== --- python/branches/ssize_t/setup.py (original) +++ python/branches/ssize_t/setup.py Mon Jan 2 16:17:17 2006 @@ -148,16 +148,17 @@ self.extensions.remove(ext) if platform != 'mac': - # Parse Modules/Setup to figure out which modules are turned - # on in the file. - input = text_file.TextFile('Modules/Setup', join_lines=1) + # Parse Modules/Setup and Modules/Setup.local to figure out which + # modules are turned on in the file. remove_modules = [] - while 1: - line = input.readline() - if not line: break - line = line.split() - remove_modules.append( line[0] ) - input.close() + for filename in ('Modules/Setup', 'Modules/Setup.local'): + input = text_file.TextFile(filename, join_lines=1) + while 1: + line = input.readline() + if not line: break + line = line.split() + remove_modules.append(line[0]) + input.close() for ext in self.extensions[:]: if ext.name in remove_modules: @@ -999,7 +1000,7 @@ join(os.getenv('HOME'), '/Library/Frameworks') ] - # Find the directory that contains the Tcl.framwork and Tk.framework + # Find the directory that contains the Tcl.framework and Tk.framework # bundles. # XXX distutils should support -F! for F in framework_dirs: @@ -1050,8 +1051,8 @@ # AquaTk is a separate method. Only one Tkinter will be built on # Darwin - either AquaTk, if it is found, or X11 based Tk. platform = self.get_platform() - if platform == 'darwin' and \ - self.detect_tkinter_darwin(inc_dirs, lib_dirs): + if (platform == 'darwin' and + self.detect_tkinter_darwin(inc_dirs, lib_dirs)): return # Assume we haven't found any of the libraries or include files From python-checkins at python.org Mon Jan 2 16:22:55 2006 From: python-checkins at python.org (martin.v.loewis) Date: Mon, 2 Jan 2006 16:22:55 +0100 (CET) Subject: [Python-checkins] commit of r41869 - python/branches/ssize_t/Python/Python-ast.c Message-ID: <20060102152255.73A0A1E4002@bag.python.org> Author: martin.v.loewis Date: Mon Jan 2 16:22:55 2006 New Revision: 41869 Modified: python/branches/ssize_t/Python/Python-ast.c Log: Regegenerated. Modified: python/branches/ssize_t/Python/Python-ast.c ============================================================================== --- python/branches/ssize_t/Python/Python-ast.c (original) +++ python/branches/ssize_t/Python/Python-ast.c Mon Jan 2 16:22:55 2006 @@ -1098,70 +1098,70 @@ #define CHECKSIZE(BUF, OFF, MIN) { \ - int need = *(OFF) + MIN; \ - if (need >= PyString_GET_SIZE(*(BUF))) { \ - int newsize = PyString_GET_SIZE(*(BUF)) * 2; \ - if (newsize < need) \ - newsize = need; \ - if (_PyString_Resize((BUF), newsize) < 0) \ - return 0; \ - } \ -} + int need = *(OFF) + MIN; \ + if (need >= PyString_GET_SIZE(*(BUF))) { \ + int newsize = PyString_GET_SIZE(*(BUF)) * 2; \ + if (newsize < need) \ + newsize = need; \ + if (_PyString_Resize((BUF), newsize) < 0) \ + return 0; \ + } \ +} -static int +static int marshal_write_int(PyObject **buf, int *offset, int x) { - char *s; + char *s; - CHECKSIZE(buf, offset, 4) - s = PyString_AS_STRING(*buf) + (*offset); - s[0] = (x & 0xff); - s[1] = (x >> 8) & 0xff; - s[2] = (x >> 16) & 0xff; - s[3] = (x >> 24) & 0xff; - *offset += 4; - return 1; + CHECKSIZE(buf, offset, 4) + s = PyString_AS_STRING(*buf) + (*offset); + s[0] = (x & 0xff); + s[1] = (x >> 8) & 0xff; + s[2] = (x >> 16) & 0xff; + s[3] = (x >> 24) & 0xff; + *offset += 4; + return 1; } -static int +static int marshal_write_bool(PyObject **buf, int *offset, bool b) { - if (b) - marshal_write_int(buf, offset, 1); - else - marshal_write_int(buf, offset, 0); - return 1; + if (b) + marshal_write_int(buf, offset, 1); + else + marshal_write_int(buf, offset, 0); + return 1; } -static int +static int marshal_write_identifier(PyObject **buf, int *offset, identifier id) { - int l = PyString_GET_SIZE(id); - marshal_write_int(buf, offset, l); - CHECKSIZE(buf, offset, l); - memcpy(PyString_AS_STRING(*buf) + *offset, - PyString_AS_STRING(id), l); - *offset += l; - return 1; + int l = PyString_GET_SIZE(id); + marshal_write_int(buf, offset, l); + CHECKSIZE(buf, offset, l); + memcpy(PyString_AS_STRING(*buf) + *offset, + PyString_AS_STRING(id), l); + *offset += l; + return 1; } -static int +static int marshal_write_string(PyObject **buf, int *offset, string s) { - int len = PyString_GET_SIZE(s); - marshal_write_int(buf, offset, len); - CHECKSIZE(buf, offset, len); - memcpy(PyString_AS_STRING(*buf) + *offset, - PyString_AS_STRING(s), len); - *offset += len; - return 1; + int len = PyString_GET_SIZE(s); + marshal_write_int(buf, offset, len); + CHECKSIZE(buf, offset, len); + memcpy(PyString_AS_STRING(*buf) + *offset, + PyString_AS_STRING(s), len); + *offset += len; + return 1; } -static int +static int marshal_write_object(PyObject **buf, int *offset, object s) { - /* XXX */ - return 0; + /* XXX */ + return 0; } From python-checkins at python.org Mon Jan 2 17:10:11 2006 From: python-checkins at python.org (martin.v.loewis) Date: Mon, 2 Jan 2006 17:10:11 +0100 (CET) Subject: [Python-checkins] commit of r41870 - python/branches/ssize_t/Objects/listobject.c Message-ID: <20060102161011.A5EF41E4002@bag.python.org> Author: martin.v.loewis Date: Mon Jan 2 17:10:10 2006 New Revision: 41870 Modified: python/branches/ssize_t/Objects/listobject.c Log: Correct format character for .index and .pop. Modified: python/branches/ssize_t/Objects/listobject.c ============================================================================== --- python/branches/ssize_t/Objects/listobject.c (original) +++ python/branches/ssize_t/Objects/listobject.c Mon Jan 2 17:10:10 2006 @@ -708,7 +708,7 @@ { Py_ssize_t i; PyObject *v; - if (!PyArg_ParseTuple(args, "iO:insert", &i, &v)) + if (!PyArg_ParseTuple(args, "nO:insert", &i, &v)) return NULL; if (ins1(self, i, v) == 0) Py_RETURN_NONE; @@ -868,7 +868,7 @@ if (arg != NULL) { if (PyInt_Check(arg)) i = PyInt_AS_LONG((PyIntObject*) arg); - else if (!PyArg_ParseTuple(args, "|i:pop", &i)) + else if (!PyArg_ParseTuple(args, "|n:pop", &i)) return NULL; } if (self->ob_size == 0) { From python-checkins at python.org Mon Jan 2 17:35:05 2006 From: python-checkins at python.org (martin.v.loewis) Date: Mon, 2 Jan 2006 17:35:05 +0100 (CET) Subject: [Python-checkins] commit of r41871 - in python/branches/ssize_t: Include/cStringIO.h Modules/cStringIO.c Message-ID: <20060102163505.B5DAD1E4010@bag.python.org> Author: martin.v.loewis Date: Mon Jan 2 17:35:03 2006 New Revision: 41871 Modified: python/branches/ssize_t/Include/cStringIO.h python/branches/ssize_t/Modules/cStringIO.c Log: Fix cStringIO ssize_t bugs. Modified: python/branches/ssize_t/Include/cStringIO.h ============================================================================== --- python/branches/ssize_t/Include/cStringIO.h (original) +++ python/branches/ssize_t/Include/cStringIO.h Mon Jan 2 17:35:03 2006 @@ -29,7 +29,7 @@ /* Read a string from an input object. If the last argument is -1, the remainder will be read. */ - int(*cread)(PyObject *, char **, int); + int(*cread)(PyObject *, char **, Py_ssize_t); /* Read a line from an input object. Returns the length of the read line as an int and a pointer inside the object buffer as char** (so Modified: python/branches/ssize_t/Modules/cStringIO.c ============================================================================== --- python/branches/ssize_t/Modules/cStringIO.c (original) +++ python/branches/ssize_t/Modules/cStringIO.c Mon Jan 2 17:35:03 2006 @@ -47,7 +47,7 @@ typedef struct { PyObject_HEAD char *buf; - int pos, string_size; + Py_ssize_t pos, string_size; } IOobject; #define IOOOBJECT(O) ((IOobject*)(O)) @@ -68,7 +68,7 @@ typedef struct { /* Subtype of IOobject */ PyObject_HEAD char *buf; - int pos, string_size; + Py_ssize_t pos, string_size; /* We store a reference to the object here in order to keep the buffer alive during the lifetime of the Iobject. */ PyObject *pbuf; @@ -155,7 +155,7 @@ "read([s]) -- Read s characters, or the rest of the string"); static int -IO_cread(PyObject *self, char **output, int n) { +IO_cread(PyObject *self, char **output, Py_ssize_t n) { int l; UNLESS (IO__opencheck(IOOOBJECT(self))) return -1; @@ -172,10 +172,10 @@ static PyObject * IO_read(IOobject *self, PyObject *args) { - int n = -1; + Py_ssize_t n = -1; char *output; - UNLESS (PyArg_ParseTuple(args, "|i:read", &n)) return NULL; + UNLESS (PyArg_ParseTuple(args, "|n:read", &n)) return NULL; if ( (n=IO_cread((PyObject*)self,&output,n)) < 0) return NULL; @@ -287,10 +287,10 @@ static PyObject * IO_truncate(IOobject *self, PyObject *args) { - int pos = -1; + Py_ssize_t pos = -1; UNLESS (IO__opencheck(self)) return NULL; - UNLESS (PyArg_ParseTuple(args, "|i:truncate", &pos)) return NULL; + UNLESS (PyArg_ParseTuple(args, "|n:truncate", &pos)) return NULL; if (pos < 0) pos = self->pos; if (self->string_size > pos) self->string_size = pos; @@ -326,14 +326,14 @@ static PyObject * O_seek(Oobject *self, PyObject *args) { - int i_position; Py_ssize_t position; int mode = 0; UNLESS (IO__opencheck(IOOOBJECT(self))) return NULL; - UNLESS (PyArg_ParseTuple(args, "i|i:seek", &i_position, &mode)) + UNLESS (PyArg_ParseTuple(args, "n|i:seek", &position, &mode)) return NULL; - position = i_position; + + printf("Seeking to %d\n",(int)position); if (mode == 2) { position += self->string_size; @@ -572,10 +572,11 @@ static PyObject * I_seek(Iobject *self, PyObject *args) { - int position, mode = 0; + Py_ssize_t position; + int mode = 0; UNLESS (IO__opencheck(IOOOBJECT(self))) return NULL; - UNLESS (PyArg_ParseTuple(args, "i|i:seek", &position, &mode)) + UNLESS (PyArg_ParseTuple(args, "n|i:seek", &position, &mode)) return NULL; if (mode == 2) position += self->string_size; From python-checkins at python.org Mon Jan 2 17:35:52 2006 From: python-checkins at python.org (martin.v.loewis) Date: Mon, 2 Jan 2006 17:35:52 +0100 (CET) Subject: [Python-checkins] commit of r41872 - python/branches/ssize_t/Modules/cStringIO.c Message-ID: <20060102163552.AD94A1E4011@bag.python.org> Author: martin.v.loewis Date: Mon Jan 2 17:35:52 2006 New Revision: 41872 Modified: python/branches/ssize_t/Modules/cStringIO.c Log: Revert bogus print statement. Modified: python/branches/ssize_t/Modules/cStringIO.c ============================================================================== --- python/branches/ssize_t/Modules/cStringIO.c (original) +++ python/branches/ssize_t/Modules/cStringIO.c Mon Jan 2 17:35:52 2006 @@ -333,8 +333,6 @@ UNLESS (PyArg_ParseTuple(args, "n|i:seek", &position, &mode)) return NULL; - printf("Seeking to %d\n",(int)position); - if (mode == 2) { position += self->string_size; } From python-checkins at python.org Mon Jan 2 17:47:37 2006 From: python-checkins at python.org (martin.v.loewis) Date: Mon, 2 Jan 2006 17:47:37 +0100 (CET) Subject: [Python-checkins] commit of r41873 - python/branches/ssize_t/Parser/parsetok.c Message-ID: <20060102164737.1B2461E4002@bag.python.org> Author: martin.v.loewis Date: Mon Jan 2 17:47:35 2006 New Revision: 41873 Modified: python/branches/ssize_t/Parser/parsetok.c Log: Initialize offset only if we have an error buffer. Modified: python/branches/ssize_t/Parser/parsetok.c ============================================================================== --- python/branches/ssize_t/Parser/parsetok.c (original) +++ python/branches/ssize_t/Parser/parsetok.c Mon Jan 2 17:47:35 2006 @@ -185,9 +185,9 @@ if (tok->lineno <= 1 && tok->done == E_EOF) err_ret->error = E_EOF; err_ret->lineno = tok->lineno; - assert(tok->cur - tok->buf < INT_MAX); - err_ret->offset = (int)(tok->cur - tok->buf); if (tok->buf != NULL) { + assert(tok->cur - tok->buf < INT_MAX); + err_ret->offset = (int)(tok->cur - tok->buf); size_t len = tok->inp - tok->buf; err_ret->text = (char *) PyObject_MALLOC(len + 1); if (err_ret->text != NULL) { From python-checkins at python.org Mon Jan 2 18:39:42 2006 From: python-checkins at python.org (martin.v.loewis) Date: Mon, 2 Jan 2006 18:39:42 +0100 (CET) Subject: [Python-checkins] commit of r41874 - python/branches/ssize_t/Objects/fileobject.c Message-ID: <20060102173942.AA9BD1E4002@bag.python.org> Author: martin.v.loewis Date: Mon Jan 2 18:39:41 2006 New Revision: 41874 Modified: python/branches/ssize_t/Objects/fileobject.c Log: Fix w# argument. Modified: python/branches/ssize_t/Objects/fileobject.c ============================================================================== --- python/branches/ssize_t/Objects/fileobject.c (original) +++ python/branches/ssize_t/Objects/fileobject.c Mon Jan 2 18:39:41 2006 @@ -854,7 +854,7 @@ file_readinto(PyFileObject *f, PyObject *args) { char *ptr; - int ntodo; + Py_ssize_t ntodo; Py_ssize_t ndone, nnow; if (f->f_fp == NULL) From python-checkins at python.org Mon Jan 2 18:56:04 2006 From: python-checkins at python.org (martin.v.loewis) Date: Mon, 2 Jan 2006 18:56:04 +0100 (CET) Subject: [Python-checkins] commit of r41875 - python/branches/ssize_t/Lib/test/string_tests.py Message-ID: <20060102175604.60BD41E4013@bag.python.org> Author: martin.v.loewis Date: Mon Jan 2 18:56:03 2006 New Revision: 41875 Modified: python/branches/ssize_t/Lib/test/string_tests.py Log: Disable string multiply overflow test, because it is wrong on 64-bit systems. Modified: python/branches/ssize_t/Lib/test/string_tests.py ============================================================================== --- python/branches/ssize_t/Lib/test/string_tests.py (original) +++ python/branches/ssize_t/Lib/test/string_tests.py Mon Jan 2 18:56:03 2006 @@ -627,7 +627,9 @@ self.checkequal('abcabcabc', 'abc', '__mul__', 3) self.checkraises(TypeError, 'abc', '__mul__') self.checkraises(TypeError, 'abc', '__mul__', '') - self.checkraises(OverflowError, 10000*'abc', '__mul__', 2000000000) + # XXX: on a 64-bit system, this doesn't raise an overflow error, + # but either raises a MemoryError, or succeeds (if you have 54TiB) + #self.checkraises(OverflowError, 10000*'abc', '__mul__', 2000000000) def test_join(self): # join now works with any sequence type From python-checkins at python.org Mon Jan 2 19:25:03 2006 From: python-checkins at python.org (martin.v.loewis) Date: Mon, 2 Jan 2006 19:25:03 +0100 (CET) Subject: [Python-checkins] commit of r41876 - in python/branches/ssize_t: Objects/unicodeobject.c Python/codecs.c Python/exceptions.c Message-ID: <20060102182503.3A5B41E4002@bag.python.org> Author: martin.v.loewis Date: Mon Jan 2 19:25:02 2006 New Revision: 41876 Modified: python/branches/ssize_t/Objects/unicodeobject.c python/branches/ssize_t/Python/codecs.c python/branches/ssize_t/Python/exceptions.c Log: Fix ssize_t argument parsing and building. Modified: python/branches/ssize_t/Objects/unicodeobject.c ============================================================================== --- python/branches/ssize_t/Objects/unicodeobject.c (original) +++ python/branches/ssize_t/Objects/unicodeobject.c Mon Jan 2 19:25:02 2006 @@ -747,13 +747,12 @@ const char *input, Py_ssize_t insize, Py_ssize_t *startinpos, Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr, PyObject **output, Py_ssize_t *outpos, Py_UNICODE **outptr) { - static char *argparse = "O!i;decoding error handler must return (unicode, int) tuple"; + static char *argparse = "O!n;decoding error handler must return (unicode, int) tuple"; PyObject *restuple = NULL; PyObject *repunicode = NULL; Py_ssize_t outsize = PyUnicode_GET_SIZE(*output); Py_ssize_t requiredsize; - int inewpos; Py_ssize_t newpos; Py_UNICODE *repptr; int repsize; @@ -787,12 +786,10 @@ PyErr_Format(PyExc_TypeError, &argparse[4]); goto onError; } - if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &inewpos)) + if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos)) goto onError; - if (inewpos<0) - newpos = insize+inewpos; - else - newpos = inewpos; + if (newpos<0) + newpos = insize+newpos; if (newpos<0 || newpos>insize) { /* XXX %zd? */ PyErr_Format(PyExc_IndexError, "position %d from error handler out of bounds", (int)newpos); @@ -2441,7 +2438,7 @@ Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos) { - static char *argparse = "O!i;encoding error handler must return (unicode, int) tuple"; + static char *argparse = "O!n;encoding error handler must return (unicode, int) tuple"; PyObject *restuple; PyObject *resunicode; Modified: python/branches/ssize_t/Python/codecs.c ============================================================================== --- python/branches/ssize_t/Python/codecs.c (original) +++ python/branches/ssize_t/Python/codecs.c Mon Jan 2 19:25:02 2006 @@ -478,7 +478,7 @@ return NULL; } /* ouch: passing NULL, 0, pos gives None instead of u'' */ - return Py_BuildValue("(u#i)", &end, 0, end); + return Py_BuildValue("(u#n)", &end, 0, end); } @@ -502,7 +502,7 @@ for (p = PyUnicode_AS_UNICODE(res), i = start; i Author: neal.norwitz Date: Mon Jan 2 21:07:16 2006 New Revision: 41877 Added: python/trunk/Misc/build.sh (contents, props changed) Log: Script to automatically build and test python and doc. The results are copied up to docs.python.org/dev Needs lots more work. Feel free to add code, fixes, or FIXME comments. Added: python/trunk/Misc/build.sh ============================================================================== --- (empty file) +++ python/trunk/Misc/build.sh Mon Jan 2 21:07:16 2006 @@ -0,0 +1,192 @@ +#!/bin/sh + +## Script to build and test the latest python from svn. It basically +## does this: +## svn up ; ./configure ; make ; make test ; make install ; cd Doc ; make +## +## Logs are kept and rsync'ed to the host. If there are test failure(s), +## information about the failure(s) is mailed. +## +## This script is run on the PSF's machine as user neal via crontab. +## +## Yes, this script would probably be easier in python, but then +## there's a bootstrap problem. What if Python doesn't build? +## +## This script should be fairly clean Bourne shell, ie not too many +## bash-isms. We should try to keep it portable to other Unixes. +## Even though it will probably only run on Linux. I'm sure there are +## several GNU-isms currently (date +%s and readlink). +## +## Perhaps this script should be broken up into 2 (or more) components. +## Building doc is orthogonal to the rest of the python build/test. +## + +## FIXME: we should detect test hangs (eg, if they take more than 45 minutes) + +## FIXME: we should run valgrind +## FIXME: we should run code coverage + +## Utilities invoked in this script include: +## date, dirname, expr, grep, readlink, uname +## cksum, make, mutt, rsync, svn + +## need to get svn from ~/local/bin +PATH=$PATH:$HOME/local/bin + +## remember where did we started from +DIR=`dirname $0` +if [ "$DIR" = "" ]; then + DIR="." +fi + +## make directory absolute +DIR=`readlink -f $DIR` +FULLPATHNAME="$DIR/$0" +## we want Misc/.. +DIR=`dirname $DIR` + +## Configurable options + +FAILURE_SUBJECT="Python Regression Test Failures" +#FAILURE_MAILTO="python-checkins at python.org" +#FAILURE_MAILTO="YOUR_ACCOUNT at gmail.com" +FAILURE_MAILTO="nnorwitz at gmail.com" + +REMOTE_SYSTEM="neal at dinsdale.python.org" +REMOTE_DIR="/data/ftp.python.org/pub/docs.python.org/dev/" +RESULT_FILE="$DIR/build/index.html" +INSTALL_DIR="/tmp/python-test/local" +RSYNC_OPTS="-aC -e ssh" + +REFLOG="build/reflog.txt.out" + +## utility functions +current_time() { + date +%s +} + +update_status() { + now=`current_time` + time=`expr $now - $3` + echo "
  • $1 ($time seconds)
  • " >> $RESULT_FILE +} + +mail_on_failure() { + if [ "$NUM_FAILURES" != "0" ]; then + mutt -s "$FAILURE_SUBJECT $1 ($NUM_FAILURES)" $FAILURE_MAILTO < $2 + fi +} + +## setup +cd $DIR +mkdir -p build +rm -f $RESULT_FILE build/*.out +rm -rf $INSTALL_DIR + +## create results file +TITLE="Automated Python Build Results" +echo "$TITLE" >> $RESULT_FILE +echo "" >> $RESULT_FILE +echo "

    Automated Python Build Results

    " >> $RESULT_FILE +echo "" >> $RESULT_FILE +echo " " >> $RESULT_FILE +echo " " >> $RESULT_FILE +echo " " >> $RESULT_FILE +echo " " >> $RESULT_FILE +echo " " >> $RESULT_FILE +echo " " >> $RESULT_FILE +echo " " >> $RESULT_FILE +echo "
    Built on:`date`
    Hostname:`uname -n`
    Platform:`uname -srmpo`
    " >> $RESULT_FILE +echo "
      " >> $RESULT_FILE + +## update, build, and test +ORIG_CHECKSUM=`cksum $FULLPATHNAME` +F=svn-update.out +start=`current_time` +svn update >& build/$F +err=$? +update_status "Updating" "$F" $start +if [ $err = 0 ]; then + ## FIXME: we should check if this file has changed. + ## If it has changed, we should re-run the script to pick up changes. + if [ "$ORIG_CHECKSUM" != "$ORIG_CHECKSUM" ]; then + exec $FULLPATHNAME $@ + fi + + F=svn-stat.out + start=`current_time` + svn stat >& build/$F + ## ignore some of the diffs + NUM_DIFFS=`egrep -vc '^. (@test|db_home|Lib/test/(regrtest\.py|db_home))$' build/$F` + update_status "svn stat ($NUM_DIFFS possibly important diffs)" "$F" $start + + F=configure.out + start=`current_time` + ./configure --prefix=$INSTALL_DIR --with-pydebug >& build/$F + err=$? + update_status "Configuring" "$F" $start + if [ $err = 0 ]; then + F=make.out + start=`current_time` + make >& build/$F + err=$? + warnings=`grep warning build/$F | egrep -vc "te?mpnam(_r|)' is dangerous,"` + update_status "Building ($warnings warnings)" "$F" $start + if [ $err = 0 ]; then + ## make install + F=make-install.out + start=`current_time` + make install >& build/$F + update_status "Installing" "$F" $start + + ## make and run basic tests + F=make-test.out + start=`current_time` + make test >& build/$F + NUM_FAILURES=`grep -ic fail build/$F` + update_status "Testing basics ($NUM_FAILURES failures)" "$F" $start + mail_on_failure "basics" buiild/$F + + ## run the tests looking for leaks + F=make-test-refleak.out + start=`current_time` + ./python ./Lib/test/regrtest.py -R 4:3:$REFLOG >& build/$F + NUM_FAILURES=`grep -ic leak $REFLOG` + update_status "Testing refleaks ($NUM_FAILURES failures)" "$F" $start + mail_on_failure "refleak" $REFLOG + + ## now try to run all the tests + F=make-testall.out + start=`current_time` + ## skip curses when running from cron since there's no terminal + ## skip sound since it's not setup on the PSF box (/dev/dsp) + ./python -E -tt ./Lib/test/regrtest.py -uall -x test_curses,test_linuxaudiodev,test_ossaudiodev >& build/$F + NUM_FAILURES=`grep -ic fail build/$F` + update_status "Testing all except curses and sound ($NUM_FAILURES failures)" "$F" $start + mail_on_failure "all" buiild/$F + fi + fi +fi + + +## make doc +cd Doc +F="make-doc.out" +start=`current_time` +make >& ../build/$F +err=$? +update_status "Making doc" "$F" $start +if [ $err != 0 ]; then + NUM_FAILURES=1 + mail_on_failure "doc" ../build/$F +fi + +echo "
    " >> $RESULT_FILE +echo "" >> $RESULT_FILE +echo "" >> $RESULT_FILE + +## copy results +rsync $RSYNC_OPTS html/ $REMOTE_SYSTEM:$REMOTE_DIR +cd ../build +rsync $RSYNC_OPTS index.html *.out $REMOTE_SYSTEM:$REMOTE_DIR/results/ + From python-checkins at python.org Mon Jan 2 22:15:05 2006 From: python-checkins at python.org (martin.v.loewis) Date: Mon, 2 Jan 2006 22:15:05 +0100 (CET) Subject: [Python-checkins] commit of r41878 - python/trunk/Python/compile.c Message-ID: <20060102211505.EE2251E4013@bag.python.org> Author: martin.v.loewis Date: Mon Jan 2 22:15:05 2006 New Revision: 41878 Modified: python/trunk/Python/compile.c Log: Adjust usage of compiler_use_new_block to its return type. Modified: python/trunk/Python/compile.c ============================================================================== --- python/trunk/Python/compile.c (original) +++ python/trunk/Python/compile.c Mon Jan 2 22:15:05 2006 @@ -1124,7 +1124,7 @@ c->u = u; c->c_nestlevel++; - if (compiler_use_new_block(c) < 0) + if (compiler_use_new_block(c) == NULL) return 0; return 1; @@ -2899,7 +2899,7 @@ else jumpi = JUMP_IF_TRUE; end = compiler_new_block(c); - if (end < 0) + if (end == NULL) return 0; s = e->v.BoolOp.values; n = asdl_seq_LEN(s) - 1; From python-checkins at python.org Mon Jan 2 22:15:15 2006 From: python-checkins at python.org (martin.v.loewis) Date: Mon, 2 Jan 2006 22:15:15 +0100 (CET) Subject: [Python-checkins] commit of r41879 - python/branches/ssize_t/Python/compile.c Message-ID: <20060102211515.9613F1E4020@bag.python.org> Author: martin.v.loewis Date: Mon Jan 2 22:15:15 2006 New Revision: 41879 Modified: python/branches/ssize_t/Python/compile.c Log: Adjust usage of compiler_use_new_block to its return type. Modified: python/branches/ssize_t/Python/compile.c ============================================================================== --- python/branches/ssize_t/Python/compile.c (original) +++ python/branches/ssize_t/Python/compile.c Mon Jan 2 22:15:15 2006 @@ -1124,7 +1124,7 @@ c->u = u; c->c_nestlevel++; - if (compiler_use_new_block(c) < 0) + if (compiler_use_new_block(c) == NULL) return 0; return 1; @@ -2899,7 +2899,7 @@ else jumpi = JUMP_IF_TRUE; end = compiler_new_block(c); - if (end < 0) + if (end == NULL) return 0; s = e->v.BoolOp.values; n = asdl_seq_LEN(s) - 1; From python-checkins at python.org Mon Jan 2 22:18:56 2006 From: python-checkins at python.org (martin.v.loewis) Date: Mon, 2 Jan 2006 22:18:56 +0100 (CET) Subject: [Python-checkins] commit of r41880 - python/trunk/Python/Python-ast.c Message-ID: <20060102211856.1D1F61E4002@bag.python.org> Author: martin.v.loewis Date: Mon Jan 2 22:18:55 2006 New Revision: 41880 Modified: python/trunk/Python/Python-ast.c Log: Regenerate. Modified: python/trunk/Python/Python-ast.c ============================================================================== --- python/trunk/Python/Python-ast.c (original) +++ python/trunk/Python/Python-ast.c Mon Jan 2 22:18:55 2006 @@ -1098,70 +1098,70 @@ #define CHECKSIZE(BUF, OFF, MIN) { \ - int need = *(OFF) + MIN; \ - if (need >= PyString_GET_SIZE(*(BUF))) { \ - int newsize = PyString_GET_SIZE(*(BUF)) * 2; \ - if (newsize < need) \ - newsize = need; \ - if (_PyString_Resize((BUF), newsize) < 0) \ - return 0; \ - } \ -} + int need = *(OFF) + MIN; \ + if (need >= PyString_GET_SIZE(*(BUF))) { \ + int newsize = PyString_GET_SIZE(*(BUF)) * 2; \ + if (newsize < need) \ + newsize = need; \ + if (_PyString_Resize((BUF), newsize) < 0) \ + return 0; \ + } \ +} -static int +static int marshal_write_int(PyObject **buf, int *offset, int x) { - char *s; + char *s; - CHECKSIZE(buf, offset, 4) - s = PyString_AS_STRING(*buf) + (*offset); - s[0] = (x & 0xff); - s[1] = (x >> 8) & 0xff; - s[2] = (x >> 16) & 0xff; - s[3] = (x >> 24) & 0xff; - *offset += 4; - return 1; + CHECKSIZE(buf, offset, 4) + s = PyString_AS_STRING(*buf) + (*offset); + s[0] = (x & 0xff); + s[1] = (x >> 8) & 0xff; + s[2] = (x >> 16) & 0xff; + s[3] = (x >> 24) & 0xff; + *offset += 4; + return 1; } -static int +static int marshal_write_bool(PyObject **buf, int *offset, bool b) { - if (b) - marshal_write_int(buf, offset, 1); - else - marshal_write_int(buf, offset, 0); - return 1; + if (b) + marshal_write_int(buf, offset, 1); + else + marshal_write_int(buf, offset, 0); + return 1; } -static int +static int marshal_write_identifier(PyObject **buf, int *offset, identifier id) { - int l = PyString_GET_SIZE(id); - marshal_write_int(buf, offset, l); - CHECKSIZE(buf, offset, l); - memcpy(PyString_AS_STRING(*buf) + *offset, - PyString_AS_STRING(id), l); - *offset += l; - return 1; + int l = PyString_GET_SIZE(id); + marshal_write_int(buf, offset, l); + CHECKSIZE(buf, offset, l); + memcpy(PyString_AS_STRING(*buf) + *offset, + PyString_AS_STRING(id), l); + *offset += l; + return 1; } -static int +static int marshal_write_string(PyObject **buf, int *offset, string s) { - int len = PyString_GET_SIZE(s); - marshal_write_int(buf, offset, len); - CHECKSIZE(buf, offset, len); - memcpy(PyString_AS_STRING(*buf) + *offset, - PyString_AS_STRING(s), len); - *offset += len; - return 1; + int len = PyString_GET_SIZE(s); + marshal_write_int(buf, offset, len); + CHECKSIZE(buf, offset, len); + memcpy(PyString_AS_STRING(*buf) + *offset, + PyString_AS_STRING(s), len); + *offset += len; + return 1; } -static int +static int marshal_write_object(PyObject **buf, int *offset, object s) { - /* XXX */ - return 0; + /* XXX */ + return 0; } From python-checkins at python.org Mon Jan 2 23:07:07 2006 From: python-checkins at python.org (reinhold.birkenfeld) Date: Mon, 2 Jan 2006 23:07:07 +0100 (CET) Subject: [Python-checkins] commit of r41881 - python/trunk/Doc/lib/libos.tex Message-ID: <20060102220707.979B41E4002@bag.python.org> Author: reinhold.birkenfeld Date: Mon Jan 2 23:07:06 2006 New Revision: 41881 Modified: python/trunk/Doc/lib/libos.tex Log: Bug #1395597: doc typo Modified: python/trunk/Doc/lib/libos.tex ============================================================================== --- python/trunk/Doc/lib/libos.tex (original) +++ python/trunk/Doc/lib/libos.tex Mon Jan 2 23:07:06 2006 @@ -832,9 +832,9 @@ doesn't open the FIFO --- it just creates the rendezvous point. \end{funcdesc} -\begin{funcdesc}{mknod}{path\optional{, mode=0600, device}} +\begin{funcdesc}{mknod}{filename\optional{, mode=0600, device}} Create a filesystem node (file, device special file or named pipe) -named filename. \var{mode} specifies both the permissions to use and +named \var{filename}. \var{mode} specifies both the permissions to use and the type of node to be created, being combined (bitwise OR) with one of S_IFREG, S_IFCHR, S_IFBLK, and S_IFIFO (those constants are available in \module{stat}). For S_IFCHR and S_IFBLK, \var{device} From python-checkins at python.org Mon Jan 2 23:07:25 2006 From: python-checkins at python.org (reinhold.birkenfeld) Date: Mon, 2 Jan 2006 23:07:25 +0100 (CET) Subject: [Python-checkins] commit of r41882 - python/branches/release24-maint/Doc/lib/libos.tex Message-ID: <20060102220725.EE46E1E4002@bag.python.org> Author: reinhold.birkenfeld Date: Mon Jan 2 23:07:25 2006 New Revision: 41882 Modified: python/branches/release24-maint/Doc/lib/libos.tex Log: bug #1395597: doc typo Modified: python/branches/release24-maint/Doc/lib/libos.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libos.tex (original) +++ python/branches/release24-maint/Doc/lib/libos.tex Mon Jan 2 23:07:25 2006 @@ -812,9 +812,9 @@ doesn't open the FIFO --- it just creates the rendezvous point. \end{funcdesc} -\begin{funcdesc}{mknod}{path\optional{, mode=0600, device}} +\begin{funcdesc}{mknod}{filename\optional{, mode=0600, device}} Create a filesystem node (file, device special file or named pipe) -named filename. \var{mode} specifies both the permissions to use and +named \var{filename}. \var{mode} specifies both the permissions to use and the type of node to be created, being combined (bitwise OR) with one of S_IFREG, S_IFCHR, S_IFBLK, and S_IFIFO (those constants are available in \module{stat}). For S_IFCHR and S_IFBLK, \var{device} From python-checkins at python.org Mon Jan 2 23:10:11 2006 From: python-checkins at python.org (neal.norwitz) Date: Mon, 2 Jan 2006 23:10:11 +0100 (CET) Subject: [Python-checkins] commit of r41883 - python/trunk/Misc/build.sh Message-ID: <20060102221011.307C31E4002@bag.python.org> Author: neal.norwitz Date: Mon Jan 2 23:10:10 2006 New Revision: 41883 Modified: python/trunk/Misc/build.sh Log: * Change tabs to spaces * Enable network tests when hunting for leaks * Disable curses and audio tests properly when running -u all Modified: python/trunk/Misc/build.sh ============================================================================== --- python/trunk/Misc/build.sh (original) +++ python/trunk/Misc/build.sh Mon Jan 2 23:10:10 2006 @@ -133,13 +133,13 @@ warnings=`grep warning build/$F | egrep -vc "te?mpnam(_r|)' is dangerous,"` update_status "Building ($warnings warnings)" "$F" $start if [ $err = 0 ]; then - ## make install + ## make install F=make-install.out start=`current_time` make install >& build/$F update_status "Installing" "$F" $start - ## make and run basic tests + ## make and run basic tests F=make-test.out start=`current_time` make test >& build/$F @@ -147,20 +147,20 @@ update_status "Testing basics ($NUM_FAILURES failures)" "$F" $start mail_on_failure "basics" buiild/$F - ## run the tests looking for leaks + ## run the tests looking for leaks F=make-test-refleak.out start=`current_time` - ./python ./Lib/test/regrtest.py -R 4:3:$REFLOG >& build/$F + ./python ./Lib/test/regrtest.py -R 4:3:$REFLOG -u network >& build/$F NUM_FAILURES=`grep -ic leak $REFLOG` update_status "Testing refleaks ($NUM_FAILURES failures)" "$F" $start mail_on_failure "refleak" $REFLOG - ## now try to run all the tests + ## now try to run all the tests F=make-testall.out start=`current_time` - ## skip curses when running from cron since there's no terminal - ## skip sound since it's not setup on the PSF box (/dev/dsp) - ./python -E -tt ./Lib/test/regrtest.py -uall -x test_curses,test_linuxaudiodev,test_ossaudiodev >& build/$F + ## skip curses when running from cron since there's no terminal + ## skip sound since it's not setup on the PSF box (/dev/dsp) + ./python -E -tt ./Lib/test/regrtest.py -uall -x test_curses test_linuxaudiodev test_ossaudiodev >& build/$F NUM_FAILURES=`grep -ic fail build/$F` update_status "Testing all except curses and sound ($NUM_FAILURES failures)" "$F" $start mail_on_failure "all" buiild/$F From python-checkins at python.org Tue Jan 3 00:22:41 2006 From: python-checkins at python.org (neal.norwitz) Date: Tue, 3 Jan 2006 00:22:41 +0100 (CET) Subject: [Python-checkins] commit of r41884 - python/trunk/Misc/build.sh Message-ID: <20060102232241.9BD331E4009@bag.python.org> Author: neal.norwitz Date: Tue Jan 3 00:22:41 2006 New Revision: 41884 Modified: python/trunk/Misc/build.sh Log: get the proper full pathname of the script. spell build properly Modified: python/trunk/Misc/build.sh ============================================================================== --- python/trunk/Misc/build.sh (original) +++ python/trunk/Misc/build.sh Tue Jan 3 00:22:41 2006 @@ -27,7 +27,7 @@ ## FIXME: we should run code coverage ## Utilities invoked in this script include: -## date, dirname, expr, grep, readlink, uname +## basename, date, dirname, expr, grep, readlink, uname ## cksum, make, mutt, rsync, svn ## need to get svn from ~/local/bin @@ -41,7 +41,7 @@ ## make directory absolute DIR=`readlink -f $DIR` -FULLPATHNAME="$DIR/$0" +FULLPATHNAME="$DIR/`basename $0`" ## we want Misc/.. DIR=`dirname $DIR` @@ -163,7 +163,7 @@ ./python -E -tt ./Lib/test/regrtest.py -uall -x test_curses test_linuxaudiodev test_ossaudiodev >& build/$F NUM_FAILURES=`grep -ic fail build/$F` update_status "Testing all except curses and sound ($NUM_FAILURES failures)" "$F" $start - mail_on_failure "all" buiild/$F + mail_on_failure "all" build/$F fi fi fi From python-checkins at python.org Tue Jan 3 01:33:51 2006 From: python-checkins at python.org (neal.norwitz) Date: Tue, 3 Jan 2006 01:33:51 +0100 (CET) Subject: [Python-checkins] commit of r41885 - python/trunk/Misc/build.sh Message-ID: <20060103003351.5FC081E4002@bag.python.org> Author: neal.norwitz Date: Tue Jan 3 01:33:50 2006 New Revision: 41885 Modified: python/trunk/Misc/build.sh Log: install svn in /usr/local/bin and remove from ~/local/bin Modified: python/trunk/Misc/build.sh ============================================================================== --- python/trunk/Misc/build.sh (original) +++ python/trunk/Misc/build.sh Tue Jan 3 01:33:50 2006 @@ -30,9 +30,6 @@ ## basename, date, dirname, expr, grep, readlink, uname ## cksum, make, mutt, rsync, svn -## need to get svn from ~/local/bin -PATH=$PATH:$HOME/local/bin - ## remember where did we started from DIR=`dirname $0` if [ "$DIR" = "" ]; then @@ -145,6 +142,7 @@ make test >& build/$F NUM_FAILURES=`grep -ic fail build/$F` update_status "Testing basics ($NUM_FAILURES failures)" "$F" $start + ## FIXME: should mail since -uall below should find same problems mail_on_failure "basics" buiild/$F ## run the tests looking for leaks From python-checkins at python.org Tue Jan 3 02:38:54 2006 From: python-checkins at python.org (neal.norwitz) Date: Tue, 3 Jan 2006 02:38:54 +0100 (CET) Subject: [Python-checkins] commit of r41886 - python/trunk/Makefile.pre.in Message-ID: <20060103013854.5B6811E4002@bag.python.org> Author: neal.norwitz Date: Tue Jan 3 02:38:53 2006 New Revision: 41886 Modified: python/trunk/Makefile.pre.in Log: As discussed on python-dev, don't remove Python-ast.[ch] with distclean even though they are generated. Since these files require Python 2.2+, it's possible they can't be created on a fresh system. Modified: python/trunk/Makefile.pre.in ============================================================================== --- python/trunk/Makefile.pre.in (original) +++ python/trunk/Makefile.pre.in Tue Jan 3 02:38:53 2006 @@ -988,10 +988,10 @@ # Make things extra clean, before making a distribution: # remove all generated files, even Makefile[.pre] +# Keep configure and Python-ast.[ch], it's possible they can't be generated distclean: clobber -rm -f core Makefile Makefile.pre buildno config.status \ Modules/Setup Modules/Setup.local Modules/Setup.config - -rm -f $(AST_H) $(AST_C) find $(srcdir) '(' -name '*.fdc' -o -name '*~' \ -o -name '[@,#]*' -o -name '*.old' \ -o -name '*.orig' -o -name '*.rej' \ From t-meyer at ihug.co.nz Tue Jan 3 02:42:40 2006 From: t-meyer at ihug.co.nz (Tony Meyer) Date: Tue, 3 Jan 2006 14:42:40 +1300 Subject: [Python-checkins] commit of r41885 - python/trunk/Misc/build.sh In-Reply-To: <20060103003351.5FC081E4002@bag.python.org> References: <20060103003351.5FC081E4002@bag.python.org> Message-ID: <7FC24B29-178C-4417-9EC5-EA02FB303824@ihug.co.nz> [...] > update_status "Testing basics ($NUM_FAILURES > failures)" "$F" $start > + ## FIXME: should mail since -uall below should find > same problems > mail_on_failure "basics" buiild/$F I expect the buiild is meant to be build, as with the one you previously fixed :) =Tony.Meyer From python-checkins at python.org Tue Jan 3 03:12:24 2006 From: python-checkins at python.org (neal.norwitz) Date: Tue, 3 Jan 2006 03:12:24 +0100 (CET) Subject: [Python-checkins] commit of r41887 - python/trunk/Doc/lib/libcookielib.tex Message-ID: <20060103021224.170931E4002@bag.python.org> Author: neal.norwitz Date: Tue Jan 3 03:12:20 2006 New Revision: 41887 Modified: python/trunk/Doc/lib/libcookielib.tex Log: Updated doc from patch 1117398 Modified: python/trunk/Doc/lib/libcookielib.tex ============================================================================== --- python/trunk/Doc/lib/libcookielib.tex (original) +++ python/trunk/Doc/lib/libcookielib.tex Tue Jan 3 03:12:20 2006 @@ -36,7 +36,9 @@ \begin{excdesc}{LoadError} Instances of \class{FileCookieJar} raise this exception on failure to -load cookies from a file. +load cookies from a file. \note{For backwards-compatibility +with Python 2.4 (which raised an \exception{IOError}), +\exception{LoadError} is a subclass of \exception{IOError}}. \end{excdesc} @@ -273,16 +275,19 @@ Arguments are as for \method{save()}. The named file must be in the format understood by the class, or -\exception{LoadError} will be raised. +\exception{LoadError} will be raised. Also, \exception{IOError} may +be raised, for example if the file does not exist. \note{For +backwards-compatibility with Python 2.4 (which raised +an \exception{IOError}), \exception{LoadError} is a subclass +of \exception{IOError}.} \end{methoddesc} \begin{methoddesc}[FileCookieJar]{revert}{filename=\constant{None}, ignore_discard=\constant{False}, ignore_expires=\constant{False}} Clear all cookies and reload cookies from a saved file. -Raises \exception{cookielib.LoadError} or \exception{IOError} if -reversion is not successful; the object's state will not be altered if -this happens. +\method{revert()} can raise the same exceptions as \method{load()}. +If there is a failure, the object's state will not be altered. \end{methoddesc} \class{FileCookieJar} instances have the following public attributes: From python-checkins at python.org Tue Jan 3 03:18:01 2006 From: python-checkins at python.org (neal.norwitz) Date: Tue, 3 Jan 2006 03:18:01 +0100 (CET) Subject: [Python-checkins] commit of r41888 - python/trunk/Misc/build.sh Message-ID: <20060103021801.010BC1E4002@bag.python.org> Author: neal.norwitz Date: Tue Jan 3 03:18:01 2006 New Revision: 41888 Modified: python/trunk/Misc/build.sh Log: Dammit, who the hell spells build with ii? Thanks to Tony Meyer for spotting my boo-boo. Modified: python/trunk/Misc/build.sh ============================================================================== --- python/trunk/Misc/build.sh (original) +++ python/trunk/Misc/build.sh Tue Jan 3 03:18:01 2006 @@ -143,7 +143,7 @@ NUM_FAILURES=`grep -ic fail build/$F` update_status "Testing basics ($NUM_FAILURES failures)" "$F" $start ## FIXME: should mail since -uall below should find same problems - mail_on_failure "basics" buiild/$F + mail_on_failure "basics" build/$F ## run the tests looking for leaks F=make-test-refleak.out From nnorwitz at gmail.com Tue Jan 3 03:19:04 2006 From: nnorwitz at gmail.com (Neal Norwitz) Date: Mon, 2 Jan 2006 18:19:04 -0800 Subject: [Python-checkins] commit of r41885 - python/trunk/Misc/build.sh In-Reply-To: <7FC24B29-178C-4417-9EC5-EA02FB303824@ihug.co.nz> References: <20060103003351.5FC081E4002@bag.python.org> <7FC24B29-178C-4417-9EC5-EA02FB303824@ihug.co.nz> Message-ID: On 1/2/06, Tony Meyer wrote: > > I expect the buiild is meant to be build, as with the one you > previously fixed :) Whoops! :-) Fixed the second one too. I sure hope there isn't a third one lurking. Thanks! n From python-checkins at python.org Tue Jan 3 07:03:05 2006 From: python-checkins at python.org (barry.warsaw) Date: Tue, 3 Jan 2006 07:03:05 +0100 (CET) Subject: [Python-checkins] commit of r41889 - python/trunk Message-ID: <20060103060305.5E34D1E4002@bag.python.org> Author: barry.warsaw Date: Tue Jan 3 07:03:04 2006 New Revision: 41889 Modified: python/trunk/ (props changed) Log: On the Mac, we have to ignore python.exe too. From python-checkins at python.org Tue Jan 3 07:29:55 2006 From: python-checkins at python.org (martin.v.loewis) Date: Tue, 3 Jan 2006 07:29:55 +0100 (CET) Subject: [Python-checkins] commit of r41890 - python/trunk/Tools/msi/msi.py Message-ID: <20060103062955.BBB451E4002@bag.python.org> Author: martin.v.loewis Date: Tue Jan 3 07:29:53 2006 New Revision: 41890 Modified: python/trunk/Tools/msi/msi.py Log: Ignore .svn instead of CVS now. Modified: python/trunk/Tools/msi/msi.py ============================================================================== --- python/trunk/Tools/msi/msi.py (original) +++ python/trunk/Tools/msi/msi.py Tue Jan 3 07:29:53 2006 @@ -907,7 +907,7 @@ pydirs = [(root,"Lib")] while pydirs: parent, dir = pydirs.pop() - if dir == "CVS" or dir.startswith("plat-"): + if dir == ".svn" or dir.startswith("plat-"): continue elif dir in ["lib-tk", "idlelib", "Icons"]: if not have_tcl: @@ -957,9 +957,9 @@ lib.add_file("wininst-6.exe") lib.add_file("wininst-7.1.exe") if dir=="data" and parent.physical=="test" and parent.basedir.physical=="email": - # This should contain all non-CVS files listed in CVS + # This should contain all non-.svn files listed in subversion for f in os.listdir(lib.absolute): - if f.endswith(".txt") or f=="CVS":continue + if f.endswith(".txt") or f==".svn":continue if f.endswith(".au") or f.endswith(".gif"): lib.add_file(f) else: From python-checkins at python.org Tue Jan 3 07:30:40 2006 From: python-checkins at python.org (martin.v.loewis) Date: Tue, 3 Jan 2006 07:30:40 +0100 (CET) Subject: [Python-checkins] commit of r41891 - python/branches/release24-maint/Tools/msi/msi.py Message-ID: <20060103063040.B32911E400B@bag.python.org> Author: martin.v.loewis Date: Tue Jan 3 07:30:40 2006 New Revision: 41891 Modified: python/branches/release24-maint/Tools/msi/msi.py Log: Ignore .svn instead of CVS now. Modified: python/branches/release24-maint/Tools/msi/msi.py ============================================================================== --- python/branches/release24-maint/Tools/msi/msi.py (original) +++ python/branches/release24-maint/Tools/msi/msi.py Tue Jan 3 07:30:40 2006 @@ -906,7 +906,7 @@ pydirs = [(root,"Lib")] while pydirs: parent, dir = pydirs.pop() - if dir == "CVS" or dir.startswith("plat-"): + if dir == ".svn" or dir.startswith("plat-"): continue elif dir in ["lib-tk", "idlelib", "Icons"]: if not have_tcl: @@ -956,9 +956,9 @@ lib.add_file("wininst-6.exe") lib.add_file("wininst-7.1.exe") if dir=="data" and parent.physical=="test" and parent.basedir.physical=="email": - # This should contain all non-CVS files listed in CVS + # This should contain all non-.svn files listed in subversion for f in os.listdir(lib.absolute): - if f.endswith(".txt") or f=="CVS":continue + if f.endswith(".txt") or f==".svn":continue if f.endswith(".au") or f.endswith(".gif"): lib.add_file(f) else: From python-checkins at python.org Tue Jan 3 07:43:13 2006 From: python-checkins at python.org (martin.v.loewis) Date: Tue, 3 Jan 2006 07:43:13 +0100 (CET) Subject: [Python-checkins] commit of r41892 - in external/zlib: ChangeLog FAQ INDEX Makefile Makefile.in README adler32.c algorithm.txt amiga amiga/Makefile.pup amiga/Makefile.sas as400 as400/bndsrc as400/compile.clp as400/readme.txt as400/zlib.inc compress.c configure contrib contrib/README.contrib contrib/ada contrib/ada/buffer_demo.adb contrib/ada/mtest.adb contrib/ada/read.adb contrib/ada/readme.txt contrib/ada/test.adb contrib/ada/zlib-streams.adb contrib/ada/zlib-streams.ads contrib/ada/zlib-thin.adb contrib/ada/zlib-thin.ads contrib/ada/zlib.adb contrib/ada/zlib.ads contrib/ada/zlib.gpr contrib/asm586 contrib/asm586/README.586 contrib/asm586/match.S contrib/asm686 contrib/asm686/README.686 contrib/asm686/match.S contrib/blast contrib/blast/Makefile contrib/blast/README contrib/blast/blast.c contrib/blast/blast.h contrib/blast/test.pk contrib/blast/test.txt contrib/delphi contrib/delphi/ZLib.pas contrib/delphi/ZLibConst.pas contrib/delphi/readme.txt contrib/delphi/zlibd32.mak contrib/dotzlib contrib/dotzlib/DotZLib contrib/dotzlib/DotZLib.build contrib/dotzlib/DotZLib.chm contrib/dotzlib/DotZLib.sln contrib/dotzlib/DotZLib/AssemblyInfo.cs contrib/dotzlib/DotZLib/ChecksumImpl.cs contrib/dotzlib/DotZLib/CircularBuffer.cs contrib/dotzlib/DotZLib/CodecBase.cs contrib/dotzlib/DotZLib/Deflater.cs contrib/dotzlib/DotZLib/DotZLib.cs contrib/dotzlib/DotZLib/DotZLib.csproj contrib/dotzlib/DotZLib/GZipStream.cs contrib/dotzlib/DotZLib/Inflater.cs contrib/dotzlib/DotZLib/UnitTests.cs contrib/dotzlib/LICENSE_1_0.txt contrib/dotzlib/readme.txt contrib/infback9 contrib/infback9/README contrib/infback9/infback9.c contrib/infback9/infback9.h contrib/infback9/inffix9.h contrib/infback9/inflate9.h contrib/infback9/inftree9.c contrib/infback9/inftree9.h contrib/inflate86 contrib/inflate86/inffas86.c contrib/inflate86/inffast.S contrib/iostream contrib/iostream/test.cpp contrib/iostream/zfstream.cpp contrib/iostream/zfstream.h contrib/iostream2 contrib/iostream2/zstream.h contrib/iostream2/zstream_test.cpp contrib/iostream3 contrib/iostream3/README contrib/iostream3/TODO contrib/iostream3/test.cc contrib/iostream3/zfstream.cc contrib/iostream3/zfstream.h contrib/masm686 contrib/masm686/match.asm contrib/masmx64 contrib/masmx64/bld_ml64.bat contrib/masmx64/gvmat64.asm contrib/masmx64/gvmat64.obj contrib/masmx64/inffas8664.c contrib/masmx64/inffasx64.asm contrib/masmx64/inffasx64.obj contrib/masmx64/readme.txt contrib/masmx86 contrib/masmx86/bld_ml32.bat contrib/masmx86/gvmat32.asm contrib/masmx86/gvmat32.obj contrib/masmx86/gvmat32c.c contrib/masmx86/inffas32.asm contrib/masmx86/inffas32.obj contrib/masmx86/mkasm.bat contrib/masmx86/readme.txt contrib/minizip contrib/minizip/ChangeLogUnzip contrib/minizip/Makefile contrib/minizip/crypt.h contrib/minizip/ioapi.c contrib/minizip/ioapi.h contrib/minizip/iowin32.c contrib/minizip/iowin32.h contrib/minizip/miniunz.c contrib/minizip/minizip.c contrib/minizip/mztools.c contrib/minizip/mztools.h contrib/minizip/unzip.c contrib/minizip/unzip.h contrib/minizip/zip.c contrib/minizip/zip.h contrib/pascal contrib/pascal/example.pas contrib/pascal/readme.txt contrib/pascal/zlibd32.mak contrib/pascal/zlibpas.pas contrib/puff contrib/puff/Makefile contrib/puff/README contrib/puff/puff.c contrib/puff/puff.h contrib/puff/zeros.raw contrib/testzlib contrib/testzlib/testzlib.c contrib/testzlib/testzlib.txt contrib/untgz contrib/untgz/Makefile contrib/untgz/Makefile.msc contrib/untgz/untgz.c contrib/vstudio contrib/vstudio/readme.txt contrib/vstudio/vc7 contrib/vstudio/vc7/miniunz.vcproj contrib/vstudio/vc7/minizip.vcproj contrib/vstudio/vc7/testzlib.vcproj contrib/vstudio/vc7/zlib.rc contrib/vstudio/vc7/zlibstat.vcproj contrib/vstudio/vc7/zlibvc.def contrib/vstudio/vc7/zlibvc.sln contrib/vstudio/vc7/zlibvc.vcproj contrib/vstudio/vc8 contrib/vstudio/vc8/miniunz.vcproj contrib/vstudio/vc8/minizip.vcproj contrib/vstudio/vc8/testzlib.vcproj contrib/vstudio/vc8/testzlibdll.vcproj contrib/vstudio/vc8/zlib.rc contrib/vstudio/vc8/zlibstat.vcproj contrib/vstudio/vc8/zlibvc.def contrib/vstudio/vc8/zlibvc.sln contrib/vstudio/vc8/zlibvc.vcproj crc32.c crc32.h deflate.c deflate.h example.c examples examples/README.examples examples/fitblk.c examples/gun.c examples/gzappend.c examples/gzjoin.c examples/gzlog.c examples/gzlog.h examples/zlib_how.html examples/zpipe.c examples/zran.c gzio.c infback.c inffast.c inffast.h inffixed.h inflate.c inflate.h inftrees.c inftrees.h make_vms.com minigzip.c msdos msdos/Makefile.bor msdos/Makefile.dj2 msdos/Makefile.emx msdos/Makefile.msc msdos/Makefile.tc old old/Makefile.riscos old/README old/descrip.mms old/os2 old/os2/Makefile.os2 old/os2/zlib.def old/visual-basic.txt old/zlib.html projects projects/README.projects projects/visualc6 projects/visualc6/README.txt projects/visualc6/example.dsp projects/visualc6/minigzip.dsp projects/visualc6/zlib.dsp projects/visualc6/zlib.dsw qnx qnx/package.qpg trees.c trees.h uncompr.c win32 win32/DLL_FAQ.txt win32/Makefile.bor win32/Makefile.emx win32/Makefile.gcc win32/Makefile.msc win32/VisualC.txt win32/zlib.def win32/zlib1.rc zconf.h zconf.in.h zlib.3 zlib.h zutil.c zutil.h Message-ID: <20060103064313.DA3A21E4002@bag.python.org> Author: martin.v.loewis Date: Tue Jan 3 07:42:59 2006 New Revision: 41892 Added: external/zlib/ external/zlib/ChangeLog external/zlib/FAQ external/zlib/INDEX external/zlib/Makefile external/zlib/Makefile.in external/zlib/README external/zlib/adler32.c external/zlib/algorithm.txt external/zlib/amiga/ external/zlib/amiga/Makefile.pup external/zlib/amiga/Makefile.sas external/zlib/as400/ external/zlib/as400/bndsrc external/zlib/as400/compile.clp external/zlib/as400/readme.txt external/zlib/as400/zlib.inc external/zlib/compress.c external/zlib/configure (contents, props changed) external/zlib/contrib/ external/zlib/contrib/README.contrib external/zlib/contrib/ada/ external/zlib/contrib/ada/buffer_demo.adb external/zlib/contrib/ada/mtest.adb external/zlib/contrib/ada/read.adb external/zlib/contrib/ada/readme.txt external/zlib/contrib/ada/test.adb external/zlib/contrib/ada/zlib-streams.adb external/zlib/contrib/ada/zlib-streams.ads external/zlib/contrib/ada/zlib-thin.adb external/zlib/contrib/ada/zlib-thin.ads external/zlib/contrib/ada/zlib.adb external/zlib/contrib/ada/zlib.ads external/zlib/contrib/ada/zlib.gpr external/zlib/contrib/asm586/ external/zlib/contrib/asm586/README.586 external/zlib/contrib/asm586/match.S external/zlib/contrib/asm686/ external/zlib/contrib/asm686/README.686 external/zlib/contrib/asm686/match.S external/zlib/contrib/blast/ external/zlib/contrib/blast/Makefile external/zlib/contrib/blast/README external/zlib/contrib/blast/blast.c external/zlib/contrib/blast/blast.h external/zlib/contrib/blast/test.pk (contents, props changed) external/zlib/contrib/blast/test.txt external/zlib/contrib/delphi/ external/zlib/contrib/delphi/ZLib.pas external/zlib/contrib/delphi/ZLibConst.pas external/zlib/contrib/delphi/readme.txt external/zlib/contrib/delphi/zlibd32.mak external/zlib/contrib/dotzlib/ external/zlib/contrib/dotzlib/DotZLib/ external/zlib/contrib/dotzlib/DotZLib.build external/zlib/contrib/dotzlib/DotZLib.chm (contents, props changed) external/zlib/contrib/dotzlib/DotZLib.sln external/zlib/contrib/dotzlib/DotZLib/AssemblyInfo.cs external/zlib/contrib/dotzlib/DotZLib/ChecksumImpl.cs external/zlib/contrib/dotzlib/DotZLib/CircularBuffer.cs external/zlib/contrib/dotzlib/DotZLib/CodecBase.cs external/zlib/contrib/dotzlib/DotZLib/Deflater.cs external/zlib/contrib/dotzlib/DotZLib/DotZLib.cs external/zlib/contrib/dotzlib/DotZLib/DotZLib.csproj external/zlib/contrib/dotzlib/DotZLib/GZipStream.cs external/zlib/contrib/dotzlib/DotZLib/Inflater.cs external/zlib/contrib/dotzlib/DotZLib/UnitTests.cs external/zlib/contrib/dotzlib/LICENSE_1_0.txt external/zlib/contrib/dotzlib/readme.txt external/zlib/contrib/infback9/ external/zlib/contrib/infback9/README external/zlib/contrib/infback9/infback9.c external/zlib/contrib/infback9/infback9.h external/zlib/contrib/infback9/inffix9.h external/zlib/contrib/infback9/inflate9.h external/zlib/contrib/infback9/inftree9.c external/zlib/contrib/infback9/inftree9.h external/zlib/contrib/inflate86/ external/zlib/contrib/inflate86/inffas86.c external/zlib/contrib/inflate86/inffast.S external/zlib/contrib/iostream/ external/zlib/contrib/iostream/test.cpp external/zlib/contrib/iostream/zfstream.cpp external/zlib/contrib/iostream/zfstream.h external/zlib/contrib/iostream2/ external/zlib/contrib/iostream2/zstream.h external/zlib/contrib/iostream2/zstream_test.cpp external/zlib/contrib/iostream3/ external/zlib/contrib/iostream3/README external/zlib/contrib/iostream3/TODO external/zlib/contrib/iostream3/test.cc external/zlib/contrib/iostream3/zfstream.cc external/zlib/contrib/iostream3/zfstream.h external/zlib/contrib/masm686/ external/zlib/contrib/masm686/match.asm external/zlib/contrib/masmx64/ external/zlib/contrib/masmx64/bld_ml64.bat external/zlib/contrib/masmx64/gvmat64.asm external/zlib/contrib/masmx64/gvmat64.obj (contents, props changed) external/zlib/contrib/masmx64/inffas8664.c external/zlib/contrib/masmx64/inffasx64.asm external/zlib/contrib/masmx64/inffasx64.obj (contents, props changed) external/zlib/contrib/masmx64/readme.txt external/zlib/contrib/masmx86/ external/zlib/contrib/masmx86/bld_ml32.bat external/zlib/contrib/masmx86/gvmat32.asm external/zlib/contrib/masmx86/gvmat32.obj (contents, props changed) external/zlib/contrib/masmx86/gvmat32c.c external/zlib/contrib/masmx86/inffas32.asm external/zlib/contrib/masmx86/inffas32.obj (contents, props changed) external/zlib/contrib/masmx86/mkasm.bat (contents, props changed) external/zlib/contrib/masmx86/readme.txt external/zlib/contrib/minizip/ external/zlib/contrib/minizip/ChangeLogUnzip external/zlib/contrib/minizip/Makefile external/zlib/contrib/minizip/crypt.h external/zlib/contrib/minizip/ioapi.c external/zlib/contrib/minizip/ioapi.h external/zlib/contrib/minizip/iowin32.c external/zlib/contrib/minizip/iowin32.h external/zlib/contrib/minizip/miniunz.c external/zlib/contrib/minizip/minizip.c external/zlib/contrib/minizip/mztools.c external/zlib/contrib/minizip/mztools.h external/zlib/contrib/minizip/unzip.c external/zlib/contrib/minizip/unzip.h external/zlib/contrib/minizip/zip.c external/zlib/contrib/minizip/zip.h external/zlib/contrib/pascal/ external/zlib/contrib/pascal/example.pas external/zlib/contrib/pascal/readme.txt external/zlib/contrib/pascal/zlibd32.mak external/zlib/contrib/pascal/zlibpas.pas external/zlib/contrib/puff/ external/zlib/contrib/puff/Makefile external/zlib/contrib/puff/README external/zlib/contrib/puff/puff.c external/zlib/contrib/puff/puff.h external/zlib/contrib/puff/zeros.raw (contents, props changed) external/zlib/contrib/testzlib/ external/zlib/contrib/testzlib/testzlib.c external/zlib/contrib/testzlib/testzlib.txt external/zlib/contrib/untgz/ external/zlib/contrib/untgz/Makefile external/zlib/contrib/untgz/Makefile.msc external/zlib/contrib/untgz/untgz.c external/zlib/contrib/vstudio/ external/zlib/contrib/vstudio/readme.txt external/zlib/contrib/vstudio/vc7/ external/zlib/contrib/vstudio/vc7/miniunz.vcproj external/zlib/contrib/vstudio/vc7/minizip.vcproj external/zlib/contrib/vstudio/vc7/testzlib.vcproj external/zlib/contrib/vstudio/vc7/zlib.rc external/zlib/contrib/vstudio/vc7/zlibstat.vcproj external/zlib/contrib/vstudio/vc7/zlibvc.def external/zlib/contrib/vstudio/vc7/zlibvc.sln external/zlib/contrib/vstudio/vc7/zlibvc.vcproj external/zlib/contrib/vstudio/vc8/ external/zlib/contrib/vstudio/vc8/miniunz.vcproj external/zlib/contrib/vstudio/vc8/minizip.vcproj external/zlib/contrib/vstudio/vc8/testzlib.vcproj external/zlib/contrib/vstudio/vc8/testzlibdll.vcproj external/zlib/contrib/vstudio/vc8/zlib.rc external/zlib/contrib/vstudio/vc8/zlibstat.vcproj external/zlib/contrib/vstudio/vc8/zlibvc.def external/zlib/contrib/vstudio/vc8/zlibvc.sln external/zlib/contrib/vstudio/vc8/zlibvc.vcproj external/zlib/crc32.c external/zlib/crc32.h external/zlib/deflate.c external/zlib/deflate.h external/zlib/example.c external/zlib/examples/ external/zlib/examples/README.examples external/zlib/examples/fitblk.c external/zlib/examples/gun.c external/zlib/examples/gzappend.c external/zlib/examples/gzjoin.c external/zlib/examples/gzlog.c external/zlib/examples/gzlog.h external/zlib/examples/zlib_how.html external/zlib/examples/zpipe.c external/zlib/examples/zran.c external/zlib/gzio.c external/zlib/infback.c external/zlib/inffast.c external/zlib/inffast.h external/zlib/inffixed.h external/zlib/inflate.c external/zlib/inflate.h external/zlib/inftrees.c external/zlib/inftrees.h external/zlib/make_vms.com external/zlib/minigzip.c external/zlib/msdos/ external/zlib/msdos/Makefile.bor external/zlib/msdos/Makefile.dj2 external/zlib/msdos/Makefile.emx external/zlib/msdos/Makefile.msc external/zlib/msdos/Makefile.tc external/zlib/old/ external/zlib/old/Makefile.riscos external/zlib/old/README external/zlib/old/descrip.mms external/zlib/old/os2/ external/zlib/old/os2/Makefile.os2 external/zlib/old/os2/zlib.def external/zlib/old/visual-basic.txt external/zlib/old/zlib.html external/zlib/projects/ external/zlib/projects/README.projects external/zlib/projects/visualc6/ external/zlib/projects/visualc6/README.txt external/zlib/projects/visualc6/example.dsp external/zlib/projects/visualc6/minigzip.dsp external/zlib/projects/visualc6/zlib.dsp external/zlib/projects/visualc6/zlib.dsw external/zlib/qnx/ external/zlib/qnx/package.qpg external/zlib/trees.c external/zlib/trees.h external/zlib/uncompr.c external/zlib/win32/ external/zlib/win32/DLL_FAQ.txt external/zlib/win32/Makefile.bor external/zlib/win32/Makefile.emx external/zlib/win32/Makefile.gcc external/zlib/win32/Makefile.msc external/zlib/win32/VisualC.txt external/zlib/win32/zlib.def external/zlib/win32/zlib1.rc external/zlib/zconf.h external/zlib/zconf.in.h external/zlib/zlib.3 external/zlib/zlib.h external/zlib/zutil.c external/zlib/zutil.h Log: Import zlib 1.2.3 Added: external/zlib/ChangeLog ============================================================================== --- (empty file) +++ external/zlib/ChangeLog Tue Jan 3 07:42:59 2006 @@ -0,0 +1,855 @@ + + ChangeLog file for zlib + +Changes in 1.2.3 (18 July 2005) +- Apply security vulnerability fixes to contrib/infback9 as well +- Clean up some text files (carriage returns, trailing space) +- Update testzlib, vstudio, masmx64, and masmx86 in contrib [Vollant] + +Changes in 1.2.2.4 (11 July 2005) +- Add inflatePrime() function for starting inflation at bit boundary +- Avoid some Visual C warnings in deflate.c +- Avoid more silly Visual C warnings in inflate.c and inftrees.c for 64-bit + compile +- Fix some spelling errors in comments [Betts] +- Correct inflateInit2() error return documentation in zlib.h +- Added zran.c example of compressed data random access to examples + directory, shows use of inflatePrime() +- Fix cast for assignments to strm->state in inflate.c and infback.c +- Fix zlibCompileFlags() in zutil.c to use 1L for long shifts [Oberhumer] +- Move declarations of gf2 functions to right place in crc32.c [Oberhumer] +- Add cast in trees.c t avoid a warning [Oberhumer] +- Avoid some warnings in fitblk.c, gun.c, gzjoin.c in examples [Oberhumer] +- Update make_vms.com [Zinser] +- Initialize state->write in inflateReset() since copied in inflate_fast() +- Be more strict on incomplete code sets in inflate_table() and increase + ENOUGH and MAXD -- this repairs a possible security vulnerability for + invalid inflate input. Thanks to Tavis Ormandy and Markus Oberhumer for + discovering the vulnerability and providing test cases. +- Add ia64 support to configure for HP-UX [Smith] +- Add error return to gzread() for format or i/o error [Levin] +- Use malloc.h for OS/2 [Necasek] + +Changes in 1.2.2.3 (27 May 2005) +- Replace 1U constants in inflate.c and inftrees.c for 64-bit compile +- Typecast fread() return values in gzio.c [Vollant] +- Remove trailing space in minigzip.c outmode (VC++ can't deal with it) +- Fix crc check bug in gzread() after gzungetc() [Heiner] +- Add the deflateTune() function to adjust internal compression parameters +- Add a fast gzip decompressor, gun.c, to examples (use of inflateBack) +- Remove an incorrect assertion in examples/zpipe.c +- Add C++ wrapper in infback9.h [Donais] +- Fix bug in inflateCopy() when decoding fixed codes +- Note in zlib.h how much deflateSetDictionary() actually uses +- Remove USE_DICT_HEAD in deflate.c (would mess up inflate if used) +- Add _WIN32_WCE to define WIN32 in zconf.in.h [Spencer] +- Don't include stderr.h or errno.h for _WIN32_WCE in zutil.h [Spencer] +- Add gzdirect() function to indicate transparent reads +- Update contrib/minizip [Vollant] +- Fix compilation of deflate.c when both ASMV and FASTEST [Oberhumer] +- Add casts in crc32.c to avoid warnings [Oberhumer] +- Add contrib/masmx64 [Vollant] +- Update contrib/asm586, asm686, masmx86, testzlib, vstudio [Vollant] + +Changes in 1.2.2.2 (30 December 2004) +- Replace structure assignments in deflate.c and inflate.c with zmemcpy to + avoid implicit memcpy calls (portability for no-library compilation) +- Increase sprintf() buffer size in gzdopen() to allow for large numbers +- Add INFLATE_STRICT to check distances against zlib header +- Improve WinCE errno handling and comments [Chang] +- Remove comment about no gzip header processing in FAQ +- Add Z_FIXED strategy option to deflateInit2() to force fixed trees +- Add updated make_vms.com [Coghlan], update README +- Create a new "examples" directory, move gzappend.c there, add zpipe.c, + fitblk.c, gzlog.[ch], gzjoin.c, and zlib_how.html. +- Add FAQ entry and comments in deflate.c on uninitialized memory access +- Add Solaris 9 make options in configure [Gilbert] +- Allow strerror() usage in gzio.c for STDC +- Fix DecompressBuf in contrib/delphi/ZLib.pas [ManChesTer] +- Update contrib/masmx86/inffas32.asm and gvmat32.asm [Vollant] +- Use z_off_t for adler32_combine() and crc32_combine() lengths +- Make adler32() much faster for small len +- Use OS_CODE in deflate() default gzip header + +Changes in 1.2.2.1 (31 October 2004) +- Allow inflateSetDictionary() call for raw inflate +- Fix inflate header crc check bug for file names and comments +- Add deflateSetHeader() and gz_header structure for custom gzip headers +- Add inflateGetheader() to retrieve gzip headers +- Add crc32_combine() and adler32_combine() functions +- Add alloc_func, free_func, in_func, out_func to Z_PREFIX list +- Use zstreamp consistently in zlib.h (inflate_back functions) +- Remove GUNZIP condition from definition of inflate_mode in inflate.h + and in contrib/inflate86/inffast.S [Truta, Anderson] +- Add support for AMD64 in contrib/inflate86/inffas86.c [Anderson] +- Update projects/README.projects and projects/visualc6 [Truta] +- Update win32/DLL_FAQ.txt [Truta] +- Avoid warning under NO_GZCOMPRESS in gzio.c; fix typo [Truta] +- Deprecate Z_ASCII; use Z_TEXT instead [Truta] +- Use a new algorithm for setting strm->data_type in trees.c [Truta] +- Do not define an exit() prototype in zutil.c unless DEBUG defined +- Remove prototype of exit() from zutil.c, example.c, minigzip.c [Truta] +- Add comment in zlib.h for Z_NO_FLUSH parameter to deflate() +- Fix Darwin build version identification [Peterson] + +Changes in 1.2.2 (3 October 2004) +- Update zlib.h comments on gzip in-memory processing +- Set adler to 1 in inflateReset() to support Java test suite [Walles] +- Add contrib/dotzlib [Ravn] +- Update win32/DLL_FAQ.txt [Truta] +- Update contrib/minizip [Vollant] +- Move contrib/visual-basic.txt to old/ [Truta] +- Fix assembler builds in projects/visualc6/ [Truta] + +Changes in 1.2.1.2 (9 September 2004) +- Update INDEX file +- Fix trees.c to update strm->data_type (no one ever noticed!) +- Fix bug in error case in inflate.c, infback.c, and infback9.c [Brown] +- Add "volatile" to crc table flag declaration (for DYNAMIC_CRC_TABLE) +- Add limited multitasking protection to DYNAMIC_CRC_TABLE +- Add NO_vsnprintf for VMS in zutil.h [Mozilla] +- Don't declare strerror() under VMS [Mozilla] +- Add comment to DYNAMIC_CRC_TABLE to use get_crc_table() to initialize +- Update contrib/ada [Anisimkov] +- Update contrib/minizip [Vollant] +- Fix configure to not hardcode directories for Darwin [Peterson] +- Fix gzio.c to not return error on empty files [Brown] +- Fix indentation; update version in contrib/delphi/ZLib.pas and + contrib/pascal/zlibpas.pas [Truta] +- Update mkasm.bat in contrib/masmx86 [Truta] +- Update contrib/untgz [Truta] +- Add projects/README.projects [Truta] +- Add project for MS Visual C++ 6.0 in projects/visualc6 [Cadieux, Truta] +- Update win32/DLL_FAQ.txt [Truta] +- Update list of Z_PREFIX symbols in zconf.h [Randers-Pehrson, Truta] +- Remove an unnecessary assignment to curr in inftrees.c [Truta] +- Add OS/2 to exe builds in configure [Poltorak] +- Remove err dummy parameter in zlib.h [Kientzle] + +Changes in 1.2.1.1 (9 January 2004) +- Update email address in README +- Several FAQ updates +- Fix a big fat bug in inftrees.c that prevented decoding valid + dynamic blocks with only literals and no distance codes -- + Thanks to "Hot Emu" for the bug report and sample file +- Add a note to puff.c on no distance codes case. + +Changes in 1.2.1 (17 November 2003) +- Remove a tab in contrib/gzappend/gzappend.c +- Update some interfaces in contrib for new zlib functions +- Update zlib version number in some contrib entries +- Add Windows CE definition for ptrdiff_t in zutil.h [Mai, Truta] +- Support shared libraries on Hurd and KFreeBSD [Brown] +- Fix error in NO_DIVIDE option of adler32.c + +Changes in 1.2.0.8 (4 November 2003) +- Update version in contrib/delphi/ZLib.pas and contrib/pascal/zlibpas.pas +- Add experimental NO_DIVIDE #define in adler32.c + - Possibly faster on some processors (let me know if it is) +- Correct Z_BLOCK to not return on first inflate call if no wrap +- Fix strm->data_type on inflate() return to correctly indicate EOB +- Add deflatePrime() function for appending in the middle of a byte +- Add contrib/gzappend for an example of appending to a stream +- Update win32/DLL_FAQ.txt [Truta] +- Delete Turbo C comment in README [Truta] +- Improve some indentation in zconf.h [Truta] +- Fix infinite loop on bad input in configure script [Church] +- Fix gzeof() for concatenated gzip files [Johnson] +- Add example to contrib/visual-basic.txt [Michael B.] +- Add -p to mkdir's in Makefile.in [vda] +- Fix configure to properly detect presence or lack of printf functions +- Add AS400 support [Monnerat] +- Add a little Cygwin support [Wilson] + +Changes in 1.2.0.7 (21 September 2003) +- Correct some debug formats in contrib/infback9 +- Cast a type in a debug statement in trees.c +- Change search and replace delimiter in configure from % to # [Beebe] +- Update contrib/untgz to 0.2 with various fixes [Truta] +- Add build support for Amiga [Nikl] +- Remove some directories in old that have been updated to 1.2 +- Add dylib building for Mac OS X in configure and Makefile.in +- Remove old distribution stuff from Makefile +- Update README to point to DLL_FAQ.txt, and add comment on Mac OS X +- Update links in README + +Changes in 1.2.0.6 (13 September 2003) +- Minor FAQ updates +- Update contrib/minizip to 1.00 [Vollant] +- Remove test of gz functions in example.c when GZ_COMPRESS defined [Truta] +- Update POSTINC comment for 68060 [Nikl] +- Add contrib/infback9 with deflate64 decoding (unsupported) +- For MVS define NO_vsnprintf and undefine FAR [van Burik] +- Add pragma for fdopen on MVS [van Burik] + +Changes in 1.2.0.5 (8 September 2003) +- Add OF to inflateBackEnd() declaration in zlib.h +- Remember start when using gzdopen in the middle of a file +- Use internal off_t counters in gz* functions to properly handle seeks +- Perform more rigorous check for distance-too-far in inffast.c +- Add Z_BLOCK flush option to return from inflate at block boundary +- Set strm->data_type on return from inflate + - Indicate bits unused, if at block boundary, and if in last block +- Replace size_t with ptrdiff_t in crc32.c, and check for correct size +- Add condition so old NO_DEFLATE define still works for compatibility +- FAQ update regarding the Windows DLL [Truta] +- INDEX update: add qnx entry, remove aix entry [Truta] +- Install zlib.3 into mandir [Wilson] +- Move contrib/zlib_dll_FAQ.txt to win32/DLL_FAQ.txt; update [Truta] +- Adapt the zlib interface to the new DLL convention guidelines [Truta] +- Introduce ZLIB_WINAPI macro to allow the export of functions using + the WINAPI calling convention, for Visual Basic [Vollant, Truta] +- Update msdos and win32 scripts and makefiles [Truta] +- Export symbols by name, not by ordinal, in win32/zlib.def [Truta] +- Add contrib/ada [Anisimkov] +- Move asm files from contrib/vstudio/vc70_32 to contrib/asm386 [Truta] +- Rename contrib/asm386 to contrib/masmx86 [Truta, Vollant] +- Add contrib/masm686 [Truta] +- Fix offsets in contrib/inflate86 and contrib/masmx86/inffas32.asm + [Truta, Vollant] +- Update contrib/delphi; rename to contrib/pascal; add example [Truta] +- Remove contrib/delphi2; add a new contrib/delphi [Truta] +- Avoid inclusion of the nonstandard in contrib/iostream, + and fix some method prototypes [Truta] +- Fix the ZCR_SEED2 constant to avoid warnings in contrib/minizip + [Truta] +- Avoid the use of backslash (\) in contrib/minizip [Vollant] +- Fix file time handling in contrib/untgz; update makefiles [Truta] +- Update contrib/vstudio/vc70_32 to comply with the new DLL guidelines + [Vollant] +- Remove contrib/vstudio/vc15_16 [Vollant] +- Rename contrib/vstudio/vc70_32 to contrib/vstudio/vc7 [Truta] +- Update README.contrib [Truta] +- Invert the assignment order of match_head and s->prev[...] in + INSERT_STRING [Truta] +- Compare TOO_FAR with 32767 instead of 32768, to avoid 16-bit warnings + [Truta] +- Compare function pointers with 0, not with NULL or Z_NULL [Truta] +- Fix prototype of syncsearch in inflate.c [Truta] +- Introduce ASMINF macro to be enabled when using an ASM implementation + of inflate_fast [Truta] +- Change NO_DEFLATE to NO_GZCOMPRESS [Truta] +- Modify test_gzio in example.c to take a single file name as a + parameter [Truta] +- Exit the example.c program if gzopen fails [Truta] +- Add type casts around strlen in example.c [Truta] +- Remove casting to sizeof in minigzip.c; give a proper type + to the variable compared with SUFFIX_LEN [Truta] +- Update definitions of STDC and STDC99 in zconf.h [Truta] +- Synchronize zconf.h with the new Windows DLL interface [Truta] +- Use SYS16BIT instead of __32BIT__ to distinguish between + 16- and 32-bit platforms [Truta] +- Use far memory allocators in small 16-bit memory models for + Turbo C [Truta] +- Add info about the use of ASMV, ASMINF and ZLIB_WINAPI in + zlibCompileFlags [Truta] +- Cygwin has vsnprintf [Wilson] +- In Windows16, OS_CODE is 0, as in MSDOS [Truta] +- In Cygwin, OS_CODE is 3 (Unix), not 11 (Windows32) [Wilson] + +Changes in 1.2.0.4 (10 August 2003) +- Minor FAQ updates +- Be more strict when checking inflateInit2's windowBits parameter +- Change NO_GUNZIP compile option to NO_GZIP to cover deflate as well +- Add gzip wrapper option to deflateInit2 using windowBits +- Add updated QNX rule in configure and qnx directory [Bonnefoy] +- Make inflate distance-too-far checks more rigorous +- Clean up FAR usage in inflate +- Add casting to sizeof() in gzio.c and minigzip.c + +Changes in 1.2.0.3 (19 July 2003) +- Fix silly error in gzungetc() implementation [Vollant] +- Update contrib/minizip and contrib/vstudio [Vollant] +- Fix printf format in example.c +- Correct cdecl support in zconf.in.h [Anisimkov] +- Minor FAQ updates + +Changes in 1.2.0.2 (13 July 2003) +- Add ZLIB_VERNUM in zlib.h for numerical preprocessor comparisons +- Attempt to avoid warnings in crc32.c for pointer-int conversion +- Add AIX to configure, remove aix directory [Bakker] +- Add some casts to minigzip.c +- Improve checking after insecure sprintf() or vsprintf() calls +- Remove #elif's from crc32.c +- Change leave label to inf_leave in inflate.c and infback.c to avoid + library conflicts +- Remove inflate gzip decoding by default--only enable gzip decoding by + special request for stricter backward compatibility +- Add zlibCompileFlags() function to return compilation information +- More typecasting in deflate.c to avoid warnings +- Remove leading underscore from _Capital #defines [Truta] +- Fix configure to link shared library when testing +- Add some Windows CE target adjustments [Mai] +- Remove #define ZLIB_DLL in zconf.h [Vollant] +- Add zlib.3 [Rodgers] +- Update RFC URL in deflate.c and algorithm.txt [Mai] +- Add zlib_dll_FAQ.txt to contrib [Truta] +- Add UL to some constants [Truta] +- Update minizip and vstudio [Vollant] +- Remove vestigial NEED_DUMMY_RETURN from zconf.in.h +- Expand use of NO_DUMMY_DECL to avoid all dummy structures +- Added iostream3 to contrib [Schwardt] +- Replace rewind() with fseek() for WinCE [Truta] +- Improve setting of zlib format compression level flags + - Report 0 for huffman and rle strategies and for level == 0 or 1 + - Report 2 only for level == 6 +- Only deal with 64K limit when necessary at compile time [Truta] +- Allow TOO_FAR check to be turned off at compile time [Truta] +- Add gzclearerr() function [Souza] +- Add gzungetc() function + +Changes in 1.2.0.1 (17 March 2003) +- Add Z_RLE strategy for run-length encoding [Truta] + - When Z_RLE requested, restrict matches to distance one + - Update zlib.h, minigzip.c, gzopen(), gzdopen() for Z_RLE +- Correct FASTEST compilation to allow level == 0 +- Clean up what gets compiled for FASTEST +- Incorporate changes to zconf.in.h [Vollant] + - Refine detection of Turbo C need for dummy returns + - Refine ZLIB_DLL compilation + - Include additional header file on VMS for off_t typedef +- Try to use _vsnprintf where it supplants vsprintf [Vollant] +- Add some casts in inffast.c +- Enchance comments in zlib.h on what happens if gzprintf() tries to + write more than 4095 bytes before compression +- Remove unused state from inflateBackEnd() +- Remove exit(0) from minigzip.c, example.c +- Get rid of all those darn tabs +- Add "check" target to Makefile.in that does the same thing as "test" +- Add "mostlyclean" and "maintainer-clean" targets to Makefile.in +- Update contrib/inflate86 [Anderson] +- Update contrib/testzlib, contrib/vstudio, contrib/minizip [Vollant] +- Add msdos and win32 directories with makefiles [Truta] +- More additions and improvements to the FAQ + +Changes in 1.2.0 (9 March 2003) +- New and improved inflate code + - About 20% faster + - Does not allocate 32K window unless and until needed + - Automatically detects and decompresses gzip streams + - Raw inflate no longer needs an extra dummy byte at end + - Added inflateBack functions using a callback interface--even faster + than inflate, useful for file utilities (gzip, zip) + - Added inflateCopy() function to record state for random access on + externally generated deflate streams (e.g. in gzip files) + - More readable code (I hope) +- New and improved crc32() + - About 50% faster, thanks to suggestions from Rodney Brown +- Add deflateBound() and compressBound() functions +- Fix memory leak in deflateInit2() +- Permit setting dictionary for raw deflate (for parallel deflate) +- Fix const declaration for gzwrite() +- Check for some malloc() failures in gzio.c +- Fix bug in gzopen() on single-byte file 0x1f +- Fix bug in gzread() on concatenated file with 0x1f at end of buffer + and next buffer doesn't start with 0x8b +- Fix uncompress() to return Z_DATA_ERROR on truncated input +- Free memory at end of example.c +- Remove MAX #define in trees.c (conflicted with some libraries) +- Fix static const's in deflate.c, gzio.c, and zutil.[ch] +- Declare malloc() and free() in gzio.c if STDC not defined +- Use malloc() instead of calloc() in zutil.c if int big enough +- Define STDC for AIX +- Add aix/ with approach for compiling shared library on AIX +- Add HP-UX support for shared libraries in configure +- Add OpenUNIX support for shared libraries in configure +- Use $cc instead of gcc to build shared library +- Make prefix directory if needed when installing +- Correct Macintosh avoidance of typedef Byte in zconf.h +- Correct Turbo C memory allocation when under Linux +- Use libz.a instead of -lz in Makefile (assure use of compiled library) +- Update configure to check for snprintf or vsnprintf functions and their + return value, warn during make if using an insecure function +- Fix configure problem with compile-time knowledge of HAVE_UNISTD_H that + is lost when library is used--resolution is to build new zconf.h +- Documentation improvements (in zlib.h): + - Document raw deflate and inflate + - Update RFCs URL + - Point out that zlib and gzip formats are different + - Note that Z_BUF_ERROR is not fatal + - Document string limit for gzprintf() and possible buffer overflow + - Note requirement on avail_out when flushing + - Note permitted values of flush parameter of inflate() +- Add some FAQs (and even answers) to the FAQ +- Add contrib/inflate86/ for x86 faster inflate +- Add contrib/blast/ for PKWare Data Compression Library decompression +- Add contrib/puff/ simple inflate for deflate format description + +Changes in 1.1.4 (11 March 2002) +- ZFREE was repeated on same allocation on some error conditions. + This creates a security problem described in + http://www.zlib.org/advisory-2002-03-11.txt +- Returned incorrect error (Z_MEM_ERROR) on some invalid data +- Avoid accesses before window for invalid distances with inflate window + less than 32K. +- force windowBits > 8 to avoid a bug in the encoder for a window size + of 256 bytes. (A complete fix will be available in 1.1.5). + +Changes in 1.1.3 (9 July 1998) +- fix "an inflate input buffer bug that shows up on rare but persistent + occasions" (Mark) +- fix gzread and gztell for concatenated .gz files (Didier Le Botlan) +- fix gzseek(..., SEEK_SET) in write mode +- fix crc check after a gzeek (Frank Faubert) +- fix miniunzip when the last entry in a zip file is itself a zip file + (J Lillge) +- add contrib/asm586 and contrib/asm686 (Brian Raiter) + See http://www.muppetlabs.com/~breadbox/software/assembly.html +- add support for Delphi 3 in contrib/delphi (Bob Dellaca) +- add support for C++Builder 3 and Delphi 3 in contrib/delphi2 (Davide Moretti) +- do not exit prematurely in untgz if 0 at start of block (Magnus Holmgren) +- use macro EXTERN instead of extern to support DLL for BeOS (Sander Stoks) +- added a FAQ file + +- Support gzdopen on Mac with Metrowerks (Jason Linhart) +- Do not redefine Byte on Mac (Brad Pettit & Jason Linhart) +- define SEEK_END too if SEEK_SET is not defined (Albert Chin-A-Young) +- avoid some warnings with Borland C (Tom Tanner) +- fix a problem in contrib/minizip/zip.c for 16-bit MSDOS (Gilles Vollant) +- emulate utime() for WIN32 in contrib/untgz (Gilles Vollant) +- allow several arguments to configure (Tim Mooney, Frodo Looijaard) +- use libdir and includedir in Makefile.in (Tim Mooney) +- support shared libraries on OSF1 V4 (Tim Mooney) +- remove so_locations in "make clean" (Tim Mooney) +- fix maketree.c compilation error (Glenn, Mark) +- Python interface to zlib now in Python 1.5 (Jeremy Hylton) +- new Makefile.riscos (Rich Walker) +- initialize static descriptors in trees.c for embedded targets (Nick Smith) +- use "foo-gz" in example.c for RISCOS and VMS (Nick Smith) +- add the OS/2 files in Makefile.in too (Andrew Zabolotny) +- fix fdopen and halloc macros for Microsoft C 6.0 (Tom Lane) +- fix maketree.c to allow clean compilation of inffixed.h (Mark) +- fix parameter check in deflateCopy (Gunther Nikl) +- cleanup trees.c, use compressed_len only in debug mode (Christian Spieler) +- Many portability patches by Christian Spieler: + . zutil.c, zutil.h: added "const" for zmem* + . Make_vms.com: fixed some typos + . Make_vms.com: msdos/Makefile.*: removed zutil.h from some dependency lists + . msdos/Makefile.msc: remove "default rtl link library" info from obj files + . msdos/Makefile.*: use model-dependent name for the built zlib library + . msdos/Makefile.emx, nt/Makefile.emx, nt/Makefile.gcc: + new makefiles, for emx (DOS/OS2), emx&rsxnt and mingw32 (Windows 9x / NT) +- use define instead of typedef for Bytef also for MSC small/medium (Tom Lane) +- replace __far with _far for better portability (Christian Spieler, Tom Lane) +- fix test for errno.h in configure (Tim Newsham) + +Changes in 1.1.2 (19 March 98) +- added contrib/minzip, mini zip and unzip based on zlib (Gilles Vollant) + See http://www.winimage.com/zLibDll/unzip.html +- preinitialize the inflate tables for fixed codes, to make the code + completely thread safe (Mark) +- some simplifications and slight speed-up to the inflate code (Mark) +- fix gzeof on non-compressed files (Allan Schrum) +- add -std1 option in configure for OSF1 to fix gzprintf (Martin Mokrejs) +- use default value of 4K for Z_BUFSIZE for 16-bit MSDOS (Tim Wegner + Glenn) +- added os2/Makefile.def and os2/zlib.def (Andrew Zabolotny) +- add shared lib support for UNIX_SV4.2MP (MATSUURA Takanori) +- do not wrap extern "C" around system includes (Tom Lane) +- mention zlib binding for TCL in README (Andreas Kupries) +- added amiga/Makefile.pup for Amiga powerUP SAS/C PPC (Andreas Kleinert) +- allow "make install prefix=..." even after configure (Glenn Randers-Pehrson) +- allow "configure --prefix $HOME" (Tim Mooney) +- remove warnings in example.c and gzio.c (Glenn Randers-Pehrson) +- move Makefile.sas to amiga/Makefile.sas + +Changes in 1.1.1 (27 Feb 98) +- fix macros _tr_tally_* in deflate.h for debug mode (Glenn Randers-Pehrson) +- remove block truncation heuristic which had very marginal effect for zlib + (smaller lit_bufsize than in gzip 1.2.4) and degraded a little the + compression ratio on some files. This also allows inlining _tr_tally for + matches in deflate_slow. +- added msdos/Makefile.w32 for WIN32 Microsoft Visual C++ (Bob Frazier) + +Changes in 1.1.0 (24 Feb 98) +- do not return STREAM_END prematurely in inflate (John Bowler) +- revert to the zlib 1.0.8 inflate to avoid the gcc 2.8.0 bug (Jeremy Buhler) +- compile with -DFASTEST to get compression code optimized for speed only +- in minigzip, try mmap'ing the input file first (Miguel Albrecht) +- increase size of I/O buffers in minigzip.c and gzio.c (not a big gain + on Sun but significant on HP) + +- add a pointer to experimental unzip library in README (Gilles Vollant) +- initialize variable gcc in configure (Chris Herborth) + +Changes in 1.0.9 (17 Feb 1998) +- added gzputs and gzgets functions +- do not clear eof flag in gzseek (Mark Diekhans) +- fix gzseek for files in transparent mode (Mark Diekhans) +- do not assume that vsprintf returns the number of bytes written (Jens Krinke) +- replace EXPORT with ZEXPORT to avoid conflict with other programs +- added compress2 in zconf.h, zlib.def, zlib.dnt +- new asm code from Gilles Vollant in contrib/asm386 +- simplify the inflate code (Mark): + . Replace ZALLOC's in huft_build() with single ZALLOC in inflate_blocks_new() + . ZALLOC the length list in inflate_trees_fixed() instead of using stack + . ZALLOC the value area for huft_build() instead of using stack + . Simplify Z_FINISH check in inflate() + +- Avoid gcc 2.8.0 comparison bug a little differently than zlib 1.0.8 +- in inftrees.c, avoid cc -O bug on HP (Farshid Elahi) +- in zconf.h move the ZLIB_DLL stuff earlier to avoid problems with + the declaration of FAR (Gilles VOllant) +- install libz.so* with mode 755 (executable) instead of 644 (Marc Lehmann) +- read_buf buf parameter of type Bytef* instead of charf* +- zmemcpy parameters are of type Bytef*, not charf* (Joseph Strout) +- do not redeclare unlink in minigzip.c for WIN32 (John Bowler) +- fix check for presence of directories in "make install" (Ian Willis) + +Changes in 1.0.8 (27 Jan 1998) +- fixed offsets in contrib/asm386/gvmat32.asm (Gilles Vollant) +- fix gzgetc and gzputc for big endian systems (Markus Oberhumer) +- added compress2() to allow setting the compression level +- include sys/types.h to get off_t on some systems (Marc Lehmann & QingLong) +- use constant arrays for the static trees in trees.c instead of computing + them at run time (thanks to Ken Raeburn for this suggestion). To create + trees.h, compile with GEN_TREES_H and run "make test". +- check return code of example in "make test" and display result +- pass minigzip command line options to file_compress +- simplifying code of inflateSync to avoid gcc 2.8 bug + +- support CC="gcc -Wall" in configure -s (QingLong) +- avoid a flush caused by ftell in gzopen for write mode (Ken Raeburn) +- fix test for shared library support to avoid compiler warnings +- zlib.lib -> zlib.dll in msdos/zlib.rc (Gilles Vollant) +- check for TARGET_OS_MAC in addition to MACOS (Brad Pettit) +- do not use fdopen for Metrowerks on Mac (Brad Pettit)) +- add checks for gzputc and gzputc in example.c +- avoid warnings in gzio.c and deflate.c (Andreas Kleinert) +- use const for the CRC table (Ken Raeburn) +- fixed "make uninstall" for shared libraries +- use Tracev instead of Trace in infblock.c +- in example.c use correct compressed length for test_sync +- suppress +vnocompatwarnings in configure for HPUX (not always supported) + +Changes in 1.0.7 (20 Jan 1998) +- fix gzseek which was broken in write mode +- return error for gzseek to negative absolute position +- fix configure for Linux (Chun-Chung Chen) +- increase stack space for MSC (Tim Wegner) +- get_crc_table and inflateSyncPoint are EXPORTed (Gilles Vollant) +- define EXPORTVA for gzprintf (Gilles Vollant) +- added man page zlib.3 (Rick Rodgers) +- for contrib/untgz, fix makedir() and improve Makefile + +- check gzseek in write mode in example.c +- allocate extra buffer for seeks only if gzseek is actually called +- avoid signed/unsigned comparisons (Tim Wegner, Gilles Vollant) +- add inflateSyncPoint in zconf.h +- fix list of exported functions in nt/zlib.dnt and mdsos/zlib.def + +Changes in 1.0.6 (19 Jan 1998) +- add functions gzprintf, gzputc, gzgetc, gztell, gzeof, gzseek, gzrewind and + gzsetparams (thanks to Roland Giersig and Kevin Ruland for some of this code) +- Fix a deflate bug occurring only with compression level 0 (thanks to + Andy Buckler for finding this one). +- In minigzip, pass transparently also the first byte for .Z files. +- return Z_BUF_ERROR instead of Z_OK if output buffer full in uncompress() +- check Z_FINISH in inflate (thanks to Marc Schluper) +- Implement deflateCopy (thanks to Adam Costello) +- make static libraries by default in configure, add --shared option. +- move MSDOS or Windows specific files to directory msdos +- suppress the notion of partial flush to simplify the interface + (but the symbol Z_PARTIAL_FLUSH is kept for compatibility with 1.0.4) +- suppress history buffer provided by application to simplify the interface + (this feature was not implemented anyway in 1.0.4) +- next_in and avail_in must be initialized before calling inflateInit or + inflateInit2 +- add EXPORT in all exported functions (for Windows DLL) +- added Makefile.nt (thanks to Stephen Williams) +- added the unsupported "contrib" directory: + contrib/asm386/ by Gilles Vollant + 386 asm code replacing longest_match(). + contrib/iostream/ by Kevin Ruland + A C++ I/O streams interface to the zlib gz* functions + contrib/iostream2/ by Tyge Løvset + Another C++ I/O streams interface + contrib/untgz/ by "Pedro A. Aranda Guti\irrez" + A very simple tar.gz file extractor using zlib + contrib/visual-basic.txt by Carlos Rios + How to use compress(), uncompress() and the gz* functions from VB. +- pass params -f (filtered data), -h (huffman only), -1 to -9 (compression + level) in minigzip (thanks to Tom Lane) + +- use const for rommable constants in deflate +- added test for gzseek and gztell in example.c +- add undocumented function inflateSyncPoint() (hack for Paul Mackerras) +- add undocumented function zError to convert error code to string + (for Tim Smithers) +- Allow compilation of gzio with -DNO_DEFLATE to avoid the compression code. +- Use default memcpy for Symantec MSDOS compiler. +- Add EXPORT keyword for check_func (needed for Windows DLL) +- add current directory to LD_LIBRARY_PATH for "make test" +- create also a link for libz.so.1 +- added support for FUJITSU UXP/DS (thanks to Toshiaki Nomura) +- use $(SHAREDLIB) instead of libz.so in Makefile.in (for HPUX) +- added -soname for Linux in configure (Chun-Chung Chen, +- assign numbers to the exported functions in zlib.def (for Windows DLL) +- add advice in zlib.h for best usage of deflateSetDictionary +- work around compiler bug on Atari (cast Z_NULL in call of s->checkfn) +- allow compilation with ANSI keywords only enabled for TurboC in large model +- avoid "versionString"[0] (Borland bug) +- add NEED_DUMMY_RETURN for Borland +- use variable z_verbose for tracing in debug mode (L. Peter Deutsch). +- allow compilation with CC +- defined STDC for OS/2 (David Charlap) +- limit external names to 8 chars for MVS (Thomas Lund) +- in minigzip.c, use static buffers only for 16-bit systems +- fix suffix check for "minigzip -d foo.gz" +- do not return an error for the 2nd of two consecutive gzflush() (Felix Lee) +- use _fdopen instead of fdopen for MSC >= 6.0 (Thomas Fanslau) +- added makelcc.bat for lcc-win32 (Tom St Denis) +- in Makefile.dj2, use copy and del instead of install and rm (Frank Donahoe) +- Avoid expanded $Id$. Use "rcs -kb" or "cvs admin -kb" to avoid Id expansion. +- check for unistd.h in configure (for off_t) +- remove useless check parameter in inflate_blocks_free +- avoid useless assignment of s->check to itself in inflate_blocks_new +- do not flush twice in gzclose (thanks to Ken Raeburn) +- rename FOPEN as F_OPEN to avoid clash with /usr/include/sys/file.h +- use NO_ERRNO_H instead of enumeration of operating systems with errno.h +- work around buggy fclose on pipes for HP/UX +- support zlib DLL with BORLAND C++ 5.0 (thanks to Glenn Randers-Pehrson) +- fix configure if CC is already equal to gcc + +Changes in 1.0.5 (3 Jan 98) +- Fix inflate to terminate gracefully when fed corrupted or invalid data +- Use const for rommable constants in inflate +- Eliminate memory leaks on error conditions in inflate +- Removed some vestigial code in inflate +- Update web address in README + +Changes in 1.0.4 (24 Jul 96) +- In very rare conditions, deflate(s, Z_FINISH) could fail to produce an EOF + bit, so the decompressor could decompress all the correct data but went + on to attempt decompressing extra garbage data. This affected minigzip too. +- zlibVersion and gzerror return const char* (needed for DLL) +- port to RISCOS (no fdopen, no multiple dots, no unlink, no fileno) +- use z_error only for DEBUG (avoid problem with DLLs) + +Changes in 1.0.3 (2 Jul 96) +- use z_streamp instead of z_stream *, which is now a far pointer in MSDOS + small and medium models; this makes the library incompatible with previous + versions for these models. (No effect in large model or on other systems.) +- return OK instead of BUF_ERROR if previous deflate call returned with + avail_out as zero but there is nothing to do +- added memcmp for non STDC compilers +- define NO_DUMMY_DECL for more Mac compilers (.h files merged incorrectly) +- define __32BIT__ if __386__ or i386 is defined (pb. with Watcom and SCO) +- better check for 16-bit mode MSC (avoids problem with Symantec) + +Changes in 1.0.2 (23 May 96) +- added Windows DLL support +- added a function zlibVersion (for the DLL support) +- fixed declarations using Bytef in infutil.c (pb with MSDOS medium model) +- Bytef is define's instead of typedef'd only for Borland C +- avoid reading uninitialized memory in example.c +- mention in README that the zlib format is now RFC1950 +- updated Makefile.dj2 +- added algorithm.doc + +Changes in 1.0.1 (20 May 96) [1.0 skipped to avoid confusion] +- fix array overlay in deflate.c which sometimes caused bad compressed data +- fix inflate bug with empty stored block +- fix MSDOS medium model which was broken in 0.99 +- fix deflateParams() which could generated bad compressed data. +- Bytef is define'd instead of typedef'ed (work around Borland bug) +- added an INDEX file +- new makefiles for DJGPP (Makefile.dj2), 32-bit Borland (Makefile.b32), + Watcom (Makefile.wat), Amiga SAS/C (Makefile.sas) +- speed up adler32 for modern machines without auto-increment +- added -ansi for IRIX in configure +- static_init_done in trees.c is an int +- define unlink as delete for VMS +- fix configure for QNX +- add configure branch for SCO and HPUX +- avoid many warnings (unused variables, dead assignments, etc...) +- no fdopen for BeOS +- fix the Watcom fix for 32 bit mode (define FAR as empty) +- removed redefinition of Byte for MKWERKS +- work around an MWKERKS bug (incorrect merge of all .h files) + +Changes in 0.99 (27 Jan 96) +- allow preset dictionary shared between compressor and decompressor +- allow compression level 0 (no compression) +- add deflateParams in zlib.h: allow dynamic change of compression level + and compression strategy. +- test large buffers and deflateParams in example.c +- add optional "configure" to build zlib as a shared library +- suppress Makefile.qnx, use configure instead +- fixed deflate for 64-bit systems (detected on Cray) +- fixed inflate_blocks for 64-bit systems (detected on Alpha) +- declare Z_DEFLATED in zlib.h (possible parameter for deflateInit2) +- always return Z_BUF_ERROR when deflate() has nothing to do +- deflateInit and inflateInit are now macros to allow version checking +- prefix all global functions and types with z_ with -DZ_PREFIX +- make falloc completely reentrant (inftrees.c) +- fixed very unlikely race condition in ct_static_init +- free in reverse order of allocation to help memory manager +- use zlib-1.0/* instead of zlib/* inside the tar.gz +- make zlib warning-free with "gcc -O3 -Wall -Wwrite-strings -Wpointer-arith + -Wconversion -Wstrict-prototypes -Wmissing-prototypes" +- allow gzread on concatenated .gz files +- deflateEnd now returns Z_DATA_ERROR if it was premature +- deflate is finally (?) fully deterministic (no matches beyond end of input) +- Document Z_SYNC_FLUSH +- add uninstall in Makefile +- Check for __cpluplus in zlib.h +- Better test in ct_align for partial flush +- avoid harmless warnings for Borland C++ +- initialize hash_head in deflate.c +- avoid warning on fdopen (gzio.c) for HP cc -Aa +- include stdlib.h for STDC compilers +- include errno.h for Cray +- ignore error if ranlib doesn't exist +- call ranlib twice for NeXTSTEP +- use exec_prefix instead of prefix for libz.a +- renamed ct_* as _tr_* to avoid conflict with applications +- clear z->msg in inflateInit2 before any error return +- initialize opaque in example.c, gzio.c, deflate.c and inflate.c +- fixed typo in zconf.h (_GNUC__ => __GNUC__) +- check for WIN32 in zconf.h and zutil.c (avoid farmalloc in 32-bit mode) +- fix typo in Make_vms.com (f$trnlnm -> f$getsyi) +- in fcalloc, normalize pointer if size > 65520 bytes +- don't use special fcalloc for 32 bit Borland C++ +- use STDC instead of __GO32__ to avoid redeclaring exit, calloc, etc... +- use Z_BINARY instead of BINARY +- document that gzclose after gzdopen will close the file +- allow "a" as mode in gzopen. +- fix error checking in gzread +- allow skipping .gz extra-field on pipes +- added reference to Perl interface in README +- put the crc table in FAR data (I dislike more and more the medium model :) +- added get_crc_table +- added a dimension to all arrays (Borland C can't count). +- workaround Borland C bug in declaration of inflate_codes_new & inflate_fast +- guard against multiple inclusion of *.h (for precompiled header on Mac) +- Watcom C pretends to be Microsoft C small model even in 32 bit mode. +- don't use unsized arrays to avoid silly warnings by Visual C++: + warning C4746: 'inflate_mask' : unsized array treated as '__far' + (what's wrong with far data in far model?). +- define enum out of inflate_blocks_state to allow compilation with C++ + +Changes in 0.95 (16 Aug 95) +- fix MSDOS small and medium model (now easier to adapt to any compiler) +- inlined send_bits +- fix the final (:-) bug for deflate with flush (output was correct but + not completely flushed in rare occasions). +- default window size is same for compression and decompression + (it's now sufficient to set MAX_WBITS in zconf.h). +- voidp -> voidpf and voidnp -> voidp (for consistency with other + typedefs and because voidnp was not near in large model). + +Changes in 0.94 (13 Aug 95) +- support MSDOS medium model +- fix deflate with flush (could sometimes generate bad output) +- fix deflateReset (zlib header was incorrectly suppressed) +- added support for VMS +- allow a compression level in gzopen() +- gzflush now calls fflush +- For deflate with flush, flush even if no more input is provided. +- rename libgz.a as libz.a +- avoid complex expression in infcodes.c triggering Turbo C bug +- work around a problem with gcc on Alpha (in INSERT_STRING) +- don't use inline functions (problem with some gcc versions) +- allow renaming of Byte, uInt, etc... with #define. +- avoid warning about (unused) pointer before start of array in deflate.c +- avoid various warnings in gzio.c, example.c, infblock.c, adler32.c, zutil.c +- avoid reserved word 'new' in trees.c + +Changes in 0.93 (25 June 95) +- temporarily disable inline functions +- make deflate deterministic +- give enough lookahead for PARTIAL_FLUSH +- Set binary mode for stdin/stdout in minigzip.c for OS/2 +- don't even use signed char in inflate (not portable enough) +- fix inflate memory leak for segmented architectures + +Changes in 0.92 (3 May 95) +- don't assume that char is signed (problem on SGI) +- Clear bit buffer when starting a stored block +- no memcpy on Pyramid +- suppressed inftest.c +- optimized fill_window, put longest_match inline for gcc +- optimized inflate on stored blocks. +- untabify all sources to simplify patches + +Changes in 0.91 (2 May 95) +- Default MEM_LEVEL is 8 (not 9 for Unix) as documented in zlib.h +- Document the memory requirements in zconf.h +- added "make install" +- fix sync search logic in inflateSync +- deflate(Z_FULL_FLUSH) now works even if output buffer too short +- after inflateSync, don't scare people with just "lo world" +- added support for DJGPP + +Changes in 0.9 (1 May 95) +- don't assume that zalloc clears the allocated memory (the TurboC bug + was Mark's bug after all :) +- let again gzread copy uncompressed data unchanged (was working in 0.71) +- deflate(Z_FULL_FLUSH), inflateReset and inflateSync are now fully implemented +- added a test of inflateSync in example.c +- moved MAX_WBITS to zconf.h because users might want to change that. +- document explicitly that zalloc(64K) on MSDOS must return a normalized + pointer (zero offset) +- added Makefiles for Microsoft C, Turbo C, Borland C++ +- faster crc32() + +Changes in 0.8 (29 April 95) +- added fast inflate (inffast.c) +- deflate(Z_FINISH) now returns Z_STREAM_END when done. Warning: this + is incompatible with previous versions of zlib which returned Z_OK. +- work around a TurboC compiler bug (bad code for b << 0, see infutil.h) + (actually that was not a compiler bug, see 0.81 above) +- gzread no longer reads one extra byte in certain cases +- In gzio destroy(), don't reference a freed structure +- avoid many warnings for MSDOS +- avoid the ERROR symbol which is used by MS Windows + +Changes in 0.71 (14 April 95) +- Fixed more MSDOS compilation problems :( There is still a bug with + TurboC large model. + +Changes in 0.7 (14 April 95) +- Added full inflate support. +- Simplified the crc32() interface. The pre- and post-conditioning + (one's complement) is now done inside crc32(). WARNING: this is + incompatible with previous versions; see zlib.h for the new usage. + +Changes in 0.61 (12 April 95) +- workaround for a bug in TurboC. example and minigzip now work on MSDOS. + +Changes in 0.6 (11 April 95) +- added minigzip.c +- added gzdopen to reopen a file descriptor as gzFile +- added transparent reading of non-gziped files in gzread. +- fixed bug in gzread (don't read crc as data) +- fixed bug in destroy (gzio.c) (don't return Z_STREAM_END for gzclose). +- don't allocate big arrays in the stack (for MSDOS) +- fix some MSDOS compilation problems + +Changes in 0.5: +- do real compression in deflate.c. Z_PARTIAL_FLUSH is supported but + not yet Z_FULL_FLUSH. +- support decompression but only in a single step (forced Z_FINISH) +- added opaque object for zalloc and zfree. +- added deflateReset and inflateReset +- added a variable zlib_version for consistency checking. +- renamed the 'filter' parameter of deflateInit2 as 'strategy'. + Added Z_FILTERED and Z_HUFFMAN_ONLY constants. + +Changes in 0.4: +- avoid "zip" everywhere, use zlib instead of ziplib. +- suppress Z_BLOCK_FLUSH, interpret Z_PARTIAL_FLUSH as block flush + if compression method == 8. +- added adler32 and crc32 +- renamed deflateOptions as deflateInit2, call one or the other but not both +- added the method parameter for deflateInit2. +- added inflateInit2 +- simplied considerably deflateInit and inflateInit by not supporting + user-provided history buffer. This is supported only in deflateInit2 + and inflateInit2. + +Changes in 0.3: +- prefix all macro names with Z_ +- use Z_FINISH instead of deflateEnd to finish compression. +- added Z_HUFFMAN_ONLY +- added gzerror() Added: external/zlib/FAQ ============================================================================== --- (empty file) +++ external/zlib/FAQ Tue Jan 3 07:42:59 2006 @@ -0,0 +1,339 @@ + + Frequently Asked Questions about zlib + + +If your question is not there, please check the zlib home page +http://www.zlib.org which may have more recent information. +The lastest zlib FAQ is at http://www.gzip.org/zlib/zlib_faq.html + + + 1. Is zlib Y2K-compliant? + + Yes. zlib doesn't handle dates. + + 2. Where can I get a Windows DLL version? + + The zlib sources can be compiled without change to produce a DLL. + See the file win32/DLL_FAQ.txt in the zlib distribution. + Pointers to the precompiled DLL are found in the zlib web site at + http://www.zlib.org. + + 3. Where can I get a Visual Basic interface to zlib? + + See + * http://www.dogma.net/markn/articles/zlibtool/zlibtool.htm + * contrib/visual-basic.txt in the zlib distribution + * win32/DLL_FAQ.txt in the zlib distribution + + 4. compress() returns Z_BUF_ERROR. + + Make sure that before the call of compress, the length of the compressed + buffer is equal to the total size of the compressed buffer and not + zero. For Visual Basic, check that this parameter is passed by reference + ("as any"), not by value ("as long"). + + 5. deflate() or inflate() returns Z_BUF_ERROR. + + Before making the call, make sure that avail_in and avail_out are not + zero. When setting the parameter flush equal to Z_FINISH, also make sure + that avail_out is big enough to allow processing all pending input. + Note that a Z_BUF_ERROR is not fatal--another call to deflate() or + inflate() can be made with more input or output space. A Z_BUF_ERROR + may in fact be unavoidable depending on how the functions are used, since + it is not possible to tell whether or not there is more output pending + when strm.avail_out returns with zero. + + 6. Where's the zlib documentation (man pages, etc.)? + + It's in zlib.h for the moment, and Francis S. Lin has converted it to a + web page zlib.html. Volunteers to transform this to Unix-style man pages, + please contact us (zlib at gzip.org). Examples of zlib usage are in the files + example.c and minigzip.c. + + 7. Why don't you use GNU autoconf or libtool or ...? + + Because we would like to keep zlib as a very small and simple + package. zlib is rather portable and doesn't need much configuration. + + 8. I found a bug in zlib. + + Most of the time, such problems are due to an incorrect usage of + zlib. Please try to reproduce the problem with a small program and send + the corresponding source to us at zlib at gzip.org . Do not send + multi-megabyte data files without prior agreement. + + 9. Why do I get "undefined reference to gzputc"? + + If "make test" produces something like + + example.o(.text+0x154): undefined reference to `gzputc' + + check that you don't have old files libz.* in /usr/lib, /usr/local/lib or + /usr/X11R6/lib. Remove any old versions, then do "make install". + +10. I need a Delphi interface to zlib. + + See the contrib/delphi directory in the zlib distribution. + +11. Can zlib handle .zip archives? + + Not by itself, no. See the directory contrib/minizip in the zlib + distribution. + +12. Can zlib handle .Z files? + + No, sorry. You have to spawn an uncompress or gunzip subprocess, or adapt + the code of uncompress on your own. + +13. How can I make a Unix shared library? + + make clean + ./configure -s + make + +14. How do I install a shared zlib library on Unix? + + After the above, then: + + make install + + However, many flavors of Unix come with a shared zlib already installed. + Before going to the trouble of compiling a shared version of zlib and + trying to install it, you may want to check if it's already there! If you + can #include , it's there. The -lz option will probably link to it. + +15. I have a question about OttoPDF. + + We are not the authors of OttoPDF. The real author is on the OttoPDF web + site: Joel Hainley, jhainley at myndkryme.com. + +16. Can zlib decode Flate data in an Adobe PDF file? + + Yes. See http://www.fastio.com/ (ClibPDF), or http://www.pdflib.com/ . + To modify PDF forms, see http://sourceforge.net/projects/acroformtool/ . + +17. Why am I getting this "register_frame_info not found" error on Solaris? + + After installing zlib 1.1.4 on Solaris 2.6, running applications using zlib + generates an error such as: + + ld.so.1: rpm: fatal: relocation error: file /usr/local/lib/libz.so: + symbol __register_frame_info: referenced symbol not found + + The symbol __register_frame_info is not part of zlib, it is generated by + the C compiler (cc or gcc). You must recompile applications using zlib + which have this problem. This problem is specific to Solaris. See + http://www.sunfreeware.com for Solaris versions of zlib and applications + using zlib. + +18. Why does gzip give an error on a file I make with compress/deflate? + + The compress and deflate functions produce data in the zlib format, which + is different and incompatible with the gzip format. The gz* functions in + zlib on the other hand use the gzip format. Both the zlib and gzip + formats use the same compressed data format internally, but have different + headers and trailers around the compressed data. + +19. Ok, so why are there two different formats? + + The gzip format was designed to retain the directory information about + a single file, such as the name and last modification date. The zlib + format on the other hand was designed for in-memory and communication + channel applications, and has a much more compact header and trailer and + uses a faster integrity check than gzip. + +20. Well that's nice, but how do I make a gzip file in memory? + + You can request that deflate write the gzip format instead of the zlib + format using deflateInit2(). You can also request that inflate decode + the gzip format using inflateInit2(). Read zlib.h for more details. + +21. Is zlib thread-safe? + + Yes. However any library routines that zlib uses and any application- + provided memory allocation routines must also be thread-safe. zlib's gz* + functions use stdio library routines, and most of zlib's functions use the + library memory allocation routines by default. zlib's Init functions allow + for the application to provide custom memory allocation routines. + + Of course, you should only operate on any given zlib or gzip stream from a + single thread at a time. + +22. Can I use zlib in my commercial application? + + Yes. Please read the license in zlib.h. + +23. Is zlib under the GNU license? + + No. Please read the license in zlib.h. + +24. The license says that altered source versions must be "plainly marked". So + what exactly do I need to do to meet that requirement? + + You need to change the ZLIB_VERSION and ZLIB_VERNUM #defines in zlib.h. In + particular, the final version number needs to be changed to "f", and an + identification string should be appended to ZLIB_VERSION. Version numbers + x.x.x.f are reserved for modifications to zlib by others than the zlib + maintainers. For example, if the version of the base zlib you are altering + is "1.2.3.4", then in zlib.h you should change ZLIB_VERNUM to 0x123f, and + ZLIB_VERSION to something like "1.2.3.f-zachary-mods-v3". You can also + update the version strings in deflate.c and inftrees.c. + + For altered source distributions, you should also note the origin and + nature of the changes in zlib.h, as well as in ChangeLog and README, along + with the dates of the alterations. The origin should include at least your + name (or your company's name), and an email address to contact for help or + issues with the library. + + Note that distributing a compiled zlib library along with zlib.h and + zconf.h is also a source distribution, and so you should change + ZLIB_VERSION and ZLIB_VERNUM and note the origin and nature of the changes + in zlib.h as you would for a full source distribution. + +25. Will zlib work on a big-endian or little-endian architecture, and can I + exchange compressed data between them? + + Yes and yes. + +26. Will zlib work on a 64-bit machine? + + It should. It has been tested on 64-bit machines, and has no dependence + on any data types being limited to 32-bits in length. If you have any + difficulties, please provide a complete problem report to zlib at gzip.org + +27. Will zlib decompress data from the PKWare Data Compression Library? + + No. The PKWare DCL uses a completely different compressed data format + than does PKZIP and zlib. However, you can look in zlib's contrib/blast + directory for a possible solution to your problem. + +28. Can I access data randomly in a compressed stream? + + No, not without some preparation. If when compressing you periodically + use Z_FULL_FLUSH, carefully write all the pending data at those points, + and keep an index of those locations, then you can start decompression + at those points. You have to be careful to not use Z_FULL_FLUSH too + often, since it can significantly degrade compression. + +29. Does zlib work on MVS, OS/390, CICS, etc.? + + We don't know for sure. We have heard occasional reports of success on + these systems. If you do use it on one of these, please provide us with + a report, instructions, and patches that we can reference when we get + these questions. Thanks. + +30. Is there some simpler, easier to read version of inflate I can look at + to understand the deflate format? + + First off, you should read RFC 1951. Second, yes. Look in zlib's + contrib/puff directory. + +31. Does zlib infringe on any patents? + + As far as we know, no. In fact, that was originally the whole point behind + zlib. Look here for some more information: + + http://www.gzip.org/#faq11 + +32. Can zlib work with greater than 4 GB of data? + + Yes. inflate() and deflate() will process any amount of data correctly. + Each call of inflate() or deflate() is limited to input and output chunks + of the maximum value that can be stored in the compiler's "unsigned int" + type, but there is no limit to the number of chunks. Note however that the + strm.total_in and strm_total_out counters may be limited to 4 GB. These + counters are provided as a convenience and are not used internally by + inflate() or deflate(). The application can easily set up its own counters + updated after each call of inflate() or deflate() to count beyond 4 GB. + compress() and uncompress() may be limited to 4 GB, since they operate in a + single call. gzseek() and gztell() may be limited to 4 GB depending on how + zlib is compiled. See the zlibCompileFlags() function in zlib.h. + + The word "may" appears several times above since there is a 4 GB limit + only if the compiler's "long" type is 32 bits. If the compiler's "long" + type is 64 bits, then the limit is 16 exabytes. + +33. Does zlib have any security vulnerabilities? + + The only one that we are aware of is potentially in gzprintf(). If zlib + is compiled to use sprintf() or vsprintf(), then there is no protection + against a buffer overflow of a 4K string space, other than the caller of + gzprintf() assuring that the output will not exceed 4K. On the other + hand, if zlib is compiled to use snprintf() or vsnprintf(), which should + normally be the case, then there is no vulnerability. The ./configure + script will display warnings if an insecure variation of sprintf() will + be used by gzprintf(). Also the zlibCompileFlags() function will return + information on what variant of sprintf() is used by gzprintf(). + + If you don't have snprintf() or vsnprintf() and would like one, you can + find a portable implementation here: + + http://www.ijs.si/software/snprintf/ + + Note that you should be using the most recent version of zlib. Versions + 1.1.3 and before were subject to a double-free vulnerability. + +34. Is there a Java version of zlib? + + Probably what you want is to use zlib in Java. zlib is already included + as part of the Java SDK in the java.util.zip package. If you really want + a version of zlib written in the Java language, look on the zlib home + page for links: http://www.zlib.org/ + +35. I get this or that compiler or source-code scanner warning when I crank it + up to maximally-pedantic. Can't you guys write proper code? + + Many years ago, we gave up attempting to avoid warnings on every compiler + in the universe. It just got to be a waste of time, and some compilers + were downright silly. So now, we simply make sure that the code always + works. + +36. Valgrind (or some similar memory access checker) says that deflate is + performing a conditional jump that depends on an uninitialized value. + Isn't that a bug? + + No. That is intentional for performance reasons, and the output of + deflate is not affected. This only started showing up recently since + zlib 1.2.x uses malloc() by default for allocations, whereas earlier + versions used calloc(), which zeros out the allocated memory. + +37. Will zlib read the (insert any ancient or arcane format here) compressed + data format? + + Probably not. Look in the comp.compression FAQ for pointers to various + formats and associated software. + +38. How can I encrypt/decrypt zip files with zlib? + + zlib doesn't support encryption. The original PKZIP encryption is very weak + and can be broken with freely available programs. To get strong encryption, + use GnuPG, http://www.gnupg.org/ , which already includes zlib compression. + For PKZIP compatible "encryption", look at http://www.info-zip.org/ + +39. What's the difference between the "gzip" and "deflate" HTTP 1.1 encodings? + + "gzip" is the gzip format, and "deflate" is the zlib format. They should + probably have called the second one "zlib" instead to avoid confusion + with the raw deflate compressed data format. While the HTTP 1.1 RFC 2616 + correctly points to the zlib specification in RFC 1950 for the "deflate" + transfer encoding, there have been reports of servers and browsers that + incorrectly produce or expect raw deflate data per the deflate + specficiation in RFC 1951, most notably Microsoft. So even though the + "deflate" transfer encoding using the zlib format would be the more + efficient approach (and in fact exactly what the zlib format was designed + for), using the "gzip" transfer encoding is probably more reliable due to + an unfortunate choice of name on the part of the HTTP 1.1 authors. + + Bottom line: use the gzip format for HTTP 1.1 encoding. + +40. Does zlib support the new "Deflate64" format introduced by PKWare? + + No. PKWare has apparently decided to keep that format proprietary, since + they have not documented it as they have previous compression formats. + In any case, the compression improvements are so modest compared to other + more modern approaches, that it's not worth the effort to implement. + +41. Can you please sign these lengthy legal documents and fax them back to us + so that we can use your software in our product? + + No. Go away. Shoo. Added: external/zlib/INDEX ============================================================================== --- (empty file) +++ external/zlib/INDEX Tue Jan 3 07:42:59 2006 @@ -0,0 +1,51 @@ +ChangeLog history of changes +FAQ Frequently Asked Questions about zlib +INDEX this file +Makefile makefile for Unix (generated by configure) +Makefile.in makefile for Unix (template for configure) +README guess what +algorithm.txt description of the (de)compression algorithm +configure configure script for Unix +zconf.in.h template for zconf.h (used by configure) + +amiga/ makefiles for Amiga SAS C +as400/ makefiles for IBM AS/400 +msdos/ makefiles for MSDOS +old/ makefiles for various architectures and zlib documentation + files that have not yet been updated for zlib 1.2.x +projects/ projects for various Integrated Development Environments +qnx/ makefiles for QNX +win32/ makefiles for Windows + + zlib public header files (must be kept): +zconf.h +zlib.h + + private source files used to build the zlib library: +adler32.c +compress.c +crc32.c +crc32.h +deflate.c +deflate.h +gzio.c +infback.c +inffast.c +inffast.h +inffixed.h +inflate.c +inflate.h +inftrees.c +inftrees.h +trees.c +trees.h +uncompr.c +zutil.c +zutil.h + + source files for sample programs: +example.c +minigzip.c + + unsupported contribution by third parties +See contrib/README.contrib Added: external/zlib/Makefile ============================================================================== --- (empty file) +++ external/zlib/Makefile Tue Jan 3 07:42:59 2006 @@ -0,0 +1,154 @@ +# Makefile for zlib +# Copyright (C) 1995-2005 Jean-loup Gailly. +# For conditions of distribution and use, see copyright notice in zlib.h + +# To compile and test, type: +# ./configure; make test +# The call of configure is optional if you don't have special requirements +# If you wish to build zlib as a shared library, use: ./configure -s + +# To use the asm code, type: +# cp contrib/asm?86/match.S ./match.S +# make LOC=-DASMV OBJA=match.o + +# To install /usr/local/lib/libz.* and /usr/local/include/zlib.h, type: +# make install +# To install in $HOME instead of /usr/local, use: +# make install prefix=$HOME + +CC=cc + +CFLAGS=-O +#CFLAGS=-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7 +#CFLAGS=-g -DDEBUG +#CFLAGS=-O3 -Wall -Wwrite-strings -Wpointer-arith -Wconversion \ +# -Wstrict-prototypes -Wmissing-prototypes + +LDFLAGS=libz.a +LDSHARED=$(CC) +CPP=$(CC) -E + +LIBS=libz.a +SHAREDLIB=libz.so +SHAREDLIBV=libz.so.1.2.3 +SHAREDLIBM=libz.so.1 + +AR=ar rc +RANLIB=ranlib +TAR=tar +SHELL=/bin/sh +EXE= + +prefix = /usr/local +exec_prefix = ${prefix} +libdir = ${exec_prefix}/lib +includedir = ${prefix}/include +mandir = ${prefix}/share/man +man3dir = ${mandir}/man3 + +OBJS = adler32.o compress.o crc32.o gzio.o uncompr.o deflate.o trees.o \ + zutil.o inflate.o infback.o inftrees.o inffast.o + +OBJA = +# to use the asm code: make OBJA=match.o + +TEST_OBJS = example.o minigzip.o + +all: example$(EXE) minigzip$(EXE) + +check: test +test: all + @LD_LIBRARY_PATH=.:$(LD_LIBRARY_PATH) ; export LD_LIBRARY_PATH; \ + echo hello world | ./minigzip | ./minigzip -d || \ + echo ' *** minigzip test FAILED ***' ; \ + if ./example; then \ + echo ' *** zlib test OK ***'; \ + else \ + echo ' *** zlib test FAILED ***'; \ + fi + +libz.a: $(OBJS) $(OBJA) + $(AR) $@ $(OBJS) $(OBJA) + -@ ($(RANLIB) $@ || true) >/dev/null 2>&1 + +match.o: match.S + $(CPP) match.S > _match.s + $(CC) -c _match.s + mv _match.o match.o + rm -f _match.s + +$(SHAREDLIBV): $(OBJS) + $(LDSHARED) -o $@ $(OBJS) + rm -f $(SHAREDLIB) $(SHAREDLIBM) + ln -s $@ $(SHAREDLIB) + ln -s $@ $(SHAREDLIBM) + +example$(EXE): example.o $(LIBS) + $(CC) $(CFLAGS) -o $@ example.o $(LDFLAGS) + +minigzip$(EXE): minigzip.o $(LIBS) + $(CC) $(CFLAGS) -o $@ minigzip.o $(LDFLAGS) + +install: $(LIBS) + - at if [ ! -d $(exec_prefix) ]; then mkdir -p $(exec_prefix); fi + - at if [ ! -d $(includedir) ]; then mkdir -p $(includedir); fi + - at if [ ! -d $(libdir) ]; then mkdir -p $(libdir); fi + - at if [ ! -d $(man3dir) ]; then mkdir -p $(man3dir); fi + cp zlib.h zconf.h $(includedir) + chmod 644 $(includedir)/zlib.h $(includedir)/zconf.h + cp $(LIBS) $(libdir) + cd $(libdir); chmod 755 $(LIBS) + -@(cd $(libdir); $(RANLIB) libz.a || true) >/dev/null 2>&1 + cd $(libdir); if test -f $(SHAREDLIBV); then \ + rm -f $(SHAREDLIB) $(SHAREDLIBM); \ + ln -s $(SHAREDLIBV) $(SHAREDLIB); \ + ln -s $(SHAREDLIBV) $(SHAREDLIBM); \ + (ldconfig || true) >/dev/null 2>&1; \ + fi + cp zlib.3 $(man3dir) + chmod 644 $(man3dir)/zlib.3 +# The ranlib in install is needed on NeXTSTEP which checks file times +# ldconfig is for Linux + +uninstall: + cd $(includedir); \ + cd $(libdir); rm -f libz.a; \ + if test -f $(SHAREDLIBV); then \ + rm -f $(SHAREDLIBV) $(SHAREDLIB) $(SHAREDLIBM); \ + fi + cd $(man3dir); rm -f zlib.3 + +mostlyclean: clean +clean: + rm -f *.o *~ example$(EXE) minigzip$(EXE) \ + libz.* foo.gz so_locations \ + _match.s maketree contrib/infback9/*.o + +maintainer-clean: distclean +distclean: clean + cp -p Makefile.in Makefile + cp -p zconf.in.h zconf.h + rm -f .DS_Store + +tags: + etags *.[ch] + +depend: + makedepend -- $(CFLAGS) -- *.[ch] + +# DO NOT DELETE THIS LINE -- make depend depends on it. + +adler32.o: zlib.h zconf.h +compress.o: zlib.h zconf.h +crc32.o: crc32.h zlib.h zconf.h +deflate.o: deflate.h zutil.h zlib.h zconf.h +example.o: zlib.h zconf.h +gzio.o: zutil.h zlib.h zconf.h +inffast.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h +inflate.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h +infback.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h +inftrees.o: zutil.h zlib.h zconf.h inftrees.h +minigzip.o: zlib.h zconf.h +trees.o: deflate.h zutil.h zlib.h zconf.h trees.h +uncompr.o: zlib.h zconf.h +zutil.o: zutil.h zlib.h zconf.h Added: external/zlib/Makefile.in ============================================================================== --- (empty file) +++ external/zlib/Makefile.in Tue Jan 3 07:42:59 2006 @@ -0,0 +1,154 @@ +# Makefile for zlib +# Copyright (C) 1995-2005 Jean-loup Gailly. +# For conditions of distribution and use, see copyright notice in zlib.h + +# To compile and test, type: +# ./configure; make test +# The call of configure is optional if you don't have special requirements +# If you wish to build zlib as a shared library, use: ./configure -s + +# To use the asm code, type: +# cp contrib/asm?86/match.S ./match.S +# make LOC=-DASMV OBJA=match.o + +# To install /usr/local/lib/libz.* and /usr/local/include/zlib.h, type: +# make install +# To install in $HOME instead of /usr/local, use: +# make install prefix=$HOME + +CC=cc + +CFLAGS=-O +#CFLAGS=-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7 +#CFLAGS=-g -DDEBUG +#CFLAGS=-O3 -Wall -Wwrite-strings -Wpointer-arith -Wconversion \ +# -Wstrict-prototypes -Wmissing-prototypes + +LDFLAGS=libz.a +LDSHARED=$(CC) +CPP=$(CC) -E + +LIBS=libz.a +SHAREDLIB=libz.so +SHAREDLIBV=libz.so.1.2.3 +SHAREDLIBM=libz.so.1 + +AR=ar rc +RANLIB=ranlib +TAR=tar +SHELL=/bin/sh +EXE= + +prefix = /usr/local +exec_prefix = ${prefix} +libdir = ${exec_prefix}/lib +includedir = ${prefix}/include +mandir = ${prefix}/share/man +man3dir = ${mandir}/man3 + +OBJS = adler32.o compress.o crc32.o gzio.o uncompr.o deflate.o trees.o \ + zutil.o inflate.o infback.o inftrees.o inffast.o + +OBJA = +# to use the asm code: make OBJA=match.o + +TEST_OBJS = example.o minigzip.o + +all: example$(EXE) minigzip$(EXE) + +check: test +test: all + @LD_LIBRARY_PATH=.:$(LD_LIBRARY_PATH) ; export LD_LIBRARY_PATH; \ + echo hello world | ./minigzip | ./minigzip -d || \ + echo ' *** minigzip test FAILED ***' ; \ + if ./example; then \ + echo ' *** zlib test OK ***'; \ + else \ + echo ' *** zlib test FAILED ***'; \ + fi + +libz.a: $(OBJS) $(OBJA) + $(AR) $@ $(OBJS) $(OBJA) + -@ ($(RANLIB) $@ || true) >/dev/null 2>&1 + +match.o: match.S + $(CPP) match.S > _match.s + $(CC) -c _match.s + mv _match.o match.o + rm -f _match.s + +$(SHAREDLIBV): $(OBJS) + $(LDSHARED) -o $@ $(OBJS) + rm -f $(SHAREDLIB) $(SHAREDLIBM) + ln -s $@ $(SHAREDLIB) + ln -s $@ $(SHAREDLIBM) + +example$(EXE): example.o $(LIBS) + $(CC) $(CFLAGS) -o $@ example.o $(LDFLAGS) + +minigzip$(EXE): minigzip.o $(LIBS) + $(CC) $(CFLAGS) -o $@ minigzip.o $(LDFLAGS) + +install: $(LIBS) + - at if [ ! -d $(exec_prefix) ]; then mkdir -p $(exec_prefix); fi + - at if [ ! -d $(includedir) ]; then mkdir -p $(includedir); fi + - at if [ ! -d $(libdir) ]; then mkdir -p $(libdir); fi + - at if [ ! -d $(man3dir) ]; then mkdir -p $(man3dir); fi + cp zlib.h zconf.h $(includedir) + chmod 644 $(includedir)/zlib.h $(includedir)/zconf.h + cp $(LIBS) $(libdir) + cd $(libdir); chmod 755 $(LIBS) + -@(cd $(libdir); $(RANLIB) libz.a || true) >/dev/null 2>&1 + cd $(libdir); if test -f $(SHAREDLIBV); then \ + rm -f $(SHAREDLIB) $(SHAREDLIBM); \ + ln -s $(SHAREDLIBV) $(SHAREDLIB); \ + ln -s $(SHAREDLIBV) $(SHAREDLIBM); \ + (ldconfig || true) >/dev/null 2>&1; \ + fi + cp zlib.3 $(man3dir) + chmod 644 $(man3dir)/zlib.3 +# The ranlib in install is needed on NeXTSTEP which checks file times +# ldconfig is for Linux + +uninstall: + cd $(includedir); \ + cd $(libdir); rm -f libz.a; \ + if test -f $(SHAREDLIBV); then \ + rm -f $(SHAREDLIBV) $(SHAREDLIB) $(SHAREDLIBM); \ + fi + cd $(man3dir); rm -f zlib.3 + +mostlyclean: clean +clean: + rm -f *.o *~ example$(EXE) minigzip$(EXE) \ + libz.* foo.gz so_locations \ + _match.s maketree contrib/infback9/*.o + +maintainer-clean: distclean +distclean: clean + cp -p Makefile.in Makefile + cp -p zconf.in.h zconf.h + rm -f .DS_Store + +tags: + etags *.[ch] + +depend: + makedepend -- $(CFLAGS) -- *.[ch] + +# DO NOT DELETE THIS LINE -- make depend depends on it. + +adler32.o: zlib.h zconf.h +compress.o: zlib.h zconf.h +crc32.o: crc32.h zlib.h zconf.h +deflate.o: deflate.h zutil.h zlib.h zconf.h +example.o: zlib.h zconf.h +gzio.o: zutil.h zlib.h zconf.h +inffast.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h +inflate.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h +infback.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h +inftrees.o: zutil.h zlib.h zconf.h inftrees.h +minigzip.o: zlib.h zconf.h +trees.o: deflate.h zutil.h zlib.h zconf.h trees.h +uncompr.o: zlib.h zconf.h +zutil.o: zutil.h zlib.h zconf.h Added: external/zlib/README ============================================================================== --- (empty file) +++ external/zlib/README Tue Jan 3 07:42:59 2006 @@ -0,0 +1,125 @@ +ZLIB DATA COMPRESSION LIBRARY + +zlib 1.2.3 is a general purpose data compression library. All the code is +thread safe. The data format used by the zlib library is described by RFCs +(Request for Comments) 1950 to 1952 in the files +http://www.ietf.org/rfc/rfc1950.txt (zlib format), rfc1951.txt (deflate format) +and rfc1952.txt (gzip format). These documents are also available in other +formats from ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html + +All functions of the compression library are documented in the file zlib.h +(volunteer to write man pages welcome, contact zlib at gzip.org). A usage example +of the library is given in the file example.c which also tests that the library +is working correctly. Another example is given in the file minigzip.c. The +compression library itself is composed of all source files except example.c and +minigzip.c. + +To compile all files and run the test program, follow the instructions given at +the top of Makefile. In short "make test; make install" should work for most +machines. For Unix: "./configure; make test; make install". For MSDOS, use one +of the special makefiles such as Makefile.msc. For VMS, use make_vms.com. + +Questions about zlib should be sent to , or to Gilles Vollant + for the Windows DLL version. The zlib home page is +http://www.zlib.org or http://www.gzip.org/zlib/ Before reporting a problem, +please check this site to verify that you have the latest version of zlib; +otherwise get the latest version and check whether the problem still exists or +not. + +PLEASE read the zlib FAQ http://www.gzip.org/zlib/zlib_faq.html before asking +for help. + +Mark Nelson wrote an article about zlib for the Jan. 1997 +issue of Dr. Dobb's Journal; a copy of the article is available in +http://dogma.net/markn/articles/zlibtool/zlibtool.htm + +The changes made in version 1.2.3 are documented in the file ChangeLog. + +Unsupported third party contributions are provided in directory "contrib". + +A Java implementation of zlib is available in the Java Development Kit +http://java.sun.com/j2se/1.4.2/docs/api/java/util/zip/package-summary.html +See the zlib home page http://www.zlib.org for details. + +A Perl interface to zlib written by Paul Marquess is in the +CPAN (Comprehensive Perl Archive Network) sites +http://www.cpan.org/modules/by-module/Compress/ + +A Python interface to zlib written by A.M. Kuchling is +available in Python 1.5 and later versions, see +http://www.python.org/doc/lib/module-zlib.html + +A zlib binding for TCL written by Andreas Kupries is +availlable at http://www.oche.de/~akupries/soft/trf/trf_zip.html + +An experimental package to read and write files in .zip format, written on top +of zlib by Gilles Vollant , is available in the +contrib/minizip directory of zlib. + + +Notes for some targets: + +- For Windows DLL versions, please see win32/DLL_FAQ.txt + +- For 64-bit Irix, deflate.c must be compiled without any optimization. With + -O, one libpng test fails. The test works in 32 bit mode (with the -n32 + compiler flag). The compiler bug has been reported to SGI. + +- zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 it works + when compiled with cc. + +- On Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 is + necessary to get gzprintf working correctly. This is done by configure. + +- zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with + other compilers. Use "make test" to check your compiler. + +- gzdopen is not supported on RISCOS, BEOS and by some Mac compilers. + +- For PalmOs, see http://palmzlib.sourceforge.net/ + +- When building a shared, i.e. dynamic library on Mac OS X, the library must be + installed before testing (do "make install" before "make test"), since the + library location is specified in the library. + + +Acknowledgments: + + The deflate format used by zlib was defined by Phil Katz. The deflate + and zlib specifications were written by L. Peter Deutsch. Thanks to all the + people who reported problems and suggested various improvements in zlib; + they are too numerous to cite here. + +Copyright notice: + + (C) 1995-2004 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup at gzip.org madler at alumni.caltech.edu + +If you use the zlib library in a product, we would appreciate *not* +receiving lengthy legal documents to sign. The sources are provided +for free but without warranty of any kind. The library has been +entirely written by Jean-loup Gailly and Mark Adler; it does not +include third-party code. + +If you redistribute modified sources, we would appreciate that you include +in the file ChangeLog history information documenting your changes. Please +read the FAQ for more information on the distribution of modified source +versions. Added: external/zlib/adler32.c ============================================================================== --- (empty file) +++ external/zlib/adler32.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,149 @@ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-2004 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#define ZLIB_INTERNAL +#include "zlib.h" + +#define BASE 65521UL /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +/* use NO_DIVIDE if your processor does not do division in hardware */ +#ifdef NO_DIVIDE +# define MOD(a) \ + do { \ + if (a >= (BASE << 16)) a -= (BASE << 16); \ + if (a >= (BASE << 15)) a -= (BASE << 15); \ + if (a >= (BASE << 14)) a -= (BASE << 14); \ + if (a >= (BASE << 13)) a -= (BASE << 13); \ + if (a >= (BASE << 12)) a -= (BASE << 12); \ + if (a >= (BASE << 11)) a -= (BASE << 11); \ + if (a >= (BASE << 10)) a -= (BASE << 10); \ + if (a >= (BASE << 9)) a -= (BASE << 9); \ + if (a >= (BASE << 8)) a -= (BASE << 8); \ + if (a >= (BASE << 7)) a -= (BASE << 7); \ + if (a >= (BASE << 6)) a -= (BASE << 6); \ + if (a >= (BASE << 5)) a -= (BASE << 5); \ + if (a >= (BASE << 4)) a -= (BASE << 4); \ + if (a >= (BASE << 3)) a -= (BASE << 3); \ + if (a >= (BASE << 2)) a -= (BASE << 2); \ + if (a >= (BASE << 1)) a -= (BASE << 1); \ + if (a >= BASE) a -= BASE; \ + } while (0) +# define MOD4(a) \ + do { \ + if (a >= (BASE << 4)) a -= (BASE << 4); \ + if (a >= (BASE << 3)) a -= (BASE << 3); \ + if (a >= (BASE << 2)) a -= (BASE << 2); \ + if (a >= (BASE << 1)) a -= (BASE << 1); \ + if (a >= BASE) a -= BASE; \ + } while (0) +#else +# define MOD(a) a %= BASE +# define MOD4(a) a %= BASE +#endif + +/* ========================================================================= */ +uLong ZEXPORT adler32(adler, buf, len) + uLong adler; + const Bytef *buf; + uInt len; +{ + unsigned long sum2; + unsigned n; + + /* split Adler-32 into component sums */ + sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; + + /* in case user likes doing a byte at a time, keep it fast */ + if (len == 1) { + adler += buf[0]; + if (adler >= BASE) + adler -= BASE; + sum2 += adler; + if (sum2 >= BASE) + sum2 -= BASE; + return adler | (sum2 << 16); + } + + /* initial Adler-32 value (deferred check for len == 1 speed) */ + if (buf == Z_NULL) + return 1L; + + /* in case short lengths are provided, keep it somewhat fast */ + if (len < 16) { + while (len--) { + adler += *buf++; + sum2 += adler; + } + if (adler >= BASE) + adler -= BASE; + MOD4(sum2); /* only added so many BASE's */ + return adler | (sum2 << 16); + } + + /* do length NMAX blocks -- requires just one modulo operation */ + while (len >= NMAX) { + len -= NMAX; + n = NMAX / 16; /* NMAX is divisible by 16 */ + do { + DO16(buf); /* 16 sums unrolled */ + buf += 16; + } while (--n); + MOD(adler); + MOD(sum2); + } + + /* do remaining bytes (less than NMAX, still just one modulo) */ + if (len) { /* avoid modulos if none remaining */ + while (len >= 16) { + len -= 16; + DO16(buf); + buf += 16; + } + while (len--) { + adler += *buf++; + sum2 += adler; + } + MOD(adler); + MOD(sum2); + } + + /* return recombined sums */ + return adler | (sum2 << 16); +} + +/* ========================================================================= */ +uLong ZEXPORT adler32_combine(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off_t len2; +{ + unsigned long sum1; + unsigned long sum2; + unsigned rem; + + /* the derivation of this formula is left as an exercise for the reader */ + rem = (unsigned)(len2 % BASE); + sum1 = adler1 & 0xffff; + sum2 = rem * sum1; + MOD(sum2); + sum1 += (adler2 & 0xffff) + BASE - 1; + sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; + if (sum1 > BASE) sum1 -= BASE; + if (sum1 > BASE) sum1 -= BASE; + if (sum2 > (BASE << 1)) sum2 -= (BASE << 1); + if (sum2 > BASE) sum2 -= BASE; + return sum1 | (sum2 << 16); +} Added: external/zlib/algorithm.txt ============================================================================== --- (empty file) +++ external/zlib/algorithm.txt Tue Jan 3 07:42:59 2006 @@ -0,0 +1,209 @@ +1. Compression algorithm (deflate) + +The deflation algorithm used by gzip (also zip and zlib) is a variation of +LZ77 (Lempel-Ziv 1977, see reference below). It finds duplicated strings in +the input data. The second occurrence of a string is replaced by a +pointer to the previous string, in the form of a pair (distance, +length). Distances are limited to 32K bytes, and lengths are limited +to 258 bytes. When a string does not occur anywhere in the previous +32K bytes, it is emitted as a sequence of literal bytes. (In this +description, `string' must be taken as an arbitrary sequence of bytes, +and is not restricted to printable characters.) + +Literals or match lengths are compressed with one Huffman tree, and +match distances are compressed with another tree. The trees are stored +in a compact form at the start of each block. The blocks can have any +size (except that the compressed data for one block must fit in +available memory). A block is terminated when deflate() determines that +it would be useful to start another block with fresh trees. (This is +somewhat similar to the behavior of LZW-based _compress_.) + +Duplicated strings are found using a hash table. All input strings of +length 3 are inserted in the hash table. A hash index is computed for +the next 3 bytes. If the hash chain for this index is not empty, all +strings in the chain are compared with the current input string, and +the longest match is selected. + +The hash chains are searched starting with the most recent strings, to +favor small distances and thus take advantage of the Huffman encoding. +The hash chains are singly linked. There are no deletions from the +hash chains, the algorithm simply discards matches that are too old. + +To avoid a worst-case situation, very long hash chains are arbitrarily +truncated at a certain length, determined by a runtime option (level +parameter of deflateInit). So deflate() does not always find the longest +possible match but generally finds a match which is long enough. + +deflate() also defers the selection of matches with a lazy evaluation +mechanism. After a match of length N has been found, deflate() searches for +a longer match at the next input byte. If a longer match is found, the +previous match is truncated to a length of one (thus producing a single +literal byte) and the process of lazy evaluation begins again. Otherwise, +the original match is kept, and the next match search is attempted only N +steps later. + +The lazy match evaluation is also subject to a runtime parameter. If +the current match is long enough, deflate() reduces the search for a longer +match, thus speeding up the whole process. If compression ratio is more +important than speed, deflate() attempts a complete second search even if +the first match is already long enough. + +The lazy match evaluation is not performed for the fastest compression +modes (level parameter 1 to 3). For these fast modes, new strings +are inserted in the hash table only when no match was found, or +when the match is not too long. This degrades the compression ratio +but saves time since there are both fewer insertions and fewer searches. + + +2. Decompression algorithm (inflate) + +2.1 Introduction + +The key question is how to represent a Huffman code (or any prefix code) so +that you can decode fast. The most important characteristic is that shorter +codes are much more common than longer codes, so pay attention to decoding the +short codes fast, and let the long codes take longer to decode. + +inflate() sets up a first level table that covers some number of bits of +input less than the length of longest code. It gets that many bits from the +stream, and looks it up in the table. The table will tell if the next +code is that many bits or less and how many, and if it is, it will tell +the value, else it will point to the next level table for which inflate() +grabs more bits and tries to decode a longer code. + +How many bits to make the first lookup is a tradeoff between the time it +takes to decode and the time it takes to build the table. If building the +table took no time (and if you had infinite memory), then there would only +be a first level table to cover all the way to the longest code. However, +building the table ends up taking a lot longer for more bits since short +codes are replicated many times in such a table. What inflate() does is +simply to make the number of bits in the first table a variable, and then +to set that variable for the maximum speed. + +For inflate, which has 286 possible codes for the literal/length tree, the size +of the first table is nine bits. Also the distance trees have 30 possible +values, and the size of the first table is six bits. Note that for each of +those cases, the table ended up one bit longer than the ``average'' code +length, i.e. the code length of an approximately flat code which would be a +little more than eight bits for 286 symbols and a little less than five bits +for 30 symbols. + + +2.2 More details on the inflate table lookup + +Ok, you want to know what this cleverly obfuscated inflate tree actually +looks like. You are correct that it's not a Huffman tree. It is simply a +lookup table for the first, let's say, nine bits of a Huffman symbol. The +symbol could be as short as one bit or as long as 15 bits. If a particular +symbol is shorter than nine bits, then that symbol's translation is duplicated +in all those entries that start with that symbol's bits. For example, if the +symbol is four bits, then it's duplicated 32 times in a nine-bit table. If a +symbol is nine bits long, it appears in the table once. + +If the symbol is longer than nine bits, then that entry in the table points +to another similar table for the remaining bits. Again, there are duplicated +entries as needed. The idea is that most of the time the symbol will be short +and there will only be one table look up. (That's whole idea behind data +compression in the first place.) For the less frequent long symbols, there +will be two lookups. If you had a compression method with really long +symbols, you could have as many levels of lookups as is efficient. For +inflate, two is enough. + +So a table entry either points to another table (in which case nine bits in +the above example are gobbled), or it contains the translation for the symbol +and the number of bits to gobble. Then you start again with the next +ungobbled bit. + +You may wonder: why not just have one lookup table for how ever many bits the +longest symbol is? The reason is that if you do that, you end up spending +more time filling in duplicate symbol entries than you do actually decoding. +At least for deflate's output that generates new trees every several 10's of +kbytes. You can imagine that filling in a 2^15 entry table for a 15-bit code +would take too long if you're only decoding several thousand symbols. At the +other extreme, you could make a new table for every bit in the code. In fact, +that's essentially a Huffman tree. But then you spend two much time +traversing the tree while decoding, even for short symbols. + +So the number of bits for the first lookup table is a trade of the time to +fill out the table vs. the time spent looking at the second level and above of +the table. + +Here is an example, scaled down: + +The code being decoded, with 10 symbols, from 1 to 6 bits long: + +A: 0 +B: 10 +C: 1100 +D: 11010 +E: 11011 +F: 11100 +G: 11101 +H: 11110 +I: 111110 +J: 111111 + +Let's make the first table three bits long (eight entries): + +000: A,1 +001: A,1 +010: A,1 +011: A,1 +100: B,2 +101: B,2 +110: -> table X (gobble 3 bits) +111: -> table Y (gobble 3 bits) + +Each entry is what the bits decode as and how many bits that is, i.e. how +many bits to gobble. Or the entry points to another table, with the number of +bits to gobble implicit in the size of the table. + +Table X is two bits long since the longest code starting with 110 is five bits +long: + +00: C,1 +01: C,1 +10: D,2 +11: E,2 + +Table Y is three bits long since the longest code starting with 111 is six +bits long: + +000: F,2 +001: F,2 +010: G,2 +011: G,2 +100: H,2 +101: H,2 +110: I,3 +111: J,3 + +So what we have here are three tables with a total of 20 entries that had to +be constructed. That's compared to 64 entries for a single table. Or +compared to 16 entries for a Huffman tree (six two entry tables and one four +entry table). Assuming that the code ideally represents the probability of +the symbols, it takes on the average 1.25 lookups per symbol. That's compared +to one lookup for the single table, or 1.66 lookups per symbol for the +Huffman tree. + +There, I think that gives you a picture of what's going on. For inflate, the +meaning of a particular symbol is often more than just a letter. It can be a +byte (a "literal"), or it can be either a length or a distance which +indicates a base value and a number of bits to fetch after the code that is +added to the base value. Or it might be the special end-of-block code. The +data structures created in inftrees.c try to encode all that information +compactly in the tables. + + +Jean-loup Gailly Mark Adler +jloup at gzip.org madler at alumni.caltech.edu + + +References: + +[LZ77] Ziv J., Lempel A., ``A Universal Algorithm for Sequential Data +Compression,'' IEEE Transactions on Information Theory, Vol. 23, No. 3, +pp. 337-343. + +``DEFLATE Compressed Data Format Specification'' available in +http://www.ietf.org/rfc/rfc1951.txt Added: external/zlib/amiga/Makefile.pup ============================================================================== --- (empty file) +++ external/zlib/amiga/Makefile.pup Tue Jan 3 07:42:59 2006 @@ -0,0 +1,66 @@ +# Amiga powerUP (TM) Makefile +# makefile for libpng and SAS C V6.58/7.00 PPC compiler +# Copyright (C) 1998 by Andreas R. Kleinert + +LIBNAME = libzip.a + +CC = scppc +CFLAGS = NOSTKCHK NOSINT OPTIMIZE OPTGO OPTPEEP OPTINLOCAL OPTINL \ + OPTLOOP OPTRDEP=8 OPTDEP=8 OPTCOMP=8 NOVER +AR = ppc-amigaos-ar cr +RANLIB = ppc-amigaos-ranlib +LD = ppc-amigaos-ld -r +LDFLAGS = -o +LDLIBS = LIB:scppc.a LIB:end.o +RM = delete quiet + +OBJS = adler32.o compress.o crc32.o gzio.o uncompr.o deflate.o trees.o \ + zutil.o inflate.o infback.o inftrees.o inffast.o + +TEST_OBJS = example.o minigzip.o + +all: example minigzip + +check: test +test: all + example + echo hello world | minigzip | minigzip -d + +$(LIBNAME): $(OBJS) + $(AR) $@ $(OBJS) + -$(RANLIB) $@ + +example: example.o $(LIBNAME) + $(LD) $(LDFLAGS) $@ LIB:c_ppc.o $@.o $(LIBNAME) $(LDLIBS) + +minigzip: minigzip.o $(LIBNAME) + $(LD) $(LDFLAGS) $@ LIB:c_ppc.o $@.o $(LIBNAME) $(LDLIBS) + +mostlyclean: clean +clean: + $(RM) *.o example minigzip $(LIBNAME) foo.gz + +zip: + zip -ul9 zlib README ChangeLog Makefile Make????.??? Makefile.?? \ + descrip.mms *.[ch] + +tgz: + cd ..; tar cfz zlib/zlib.tgz zlib/README zlib/ChangeLog zlib/Makefile \ + zlib/Make????.??? zlib/Makefile.?? zlib/descrip.mms zlib/*.[ch] + +# DO NOT DELETE THIS LINE -- make depend depends on it. + +adler32.o: zlib.h zconf.h +compress.o: zlib.h zconf.h +crc32.o: crc32.h zlib.h zconf.h +deflate.o: deflate.h zutil.h zlib.h zconf.h +example.o: zlib.h zconf.h +gzio.o: zutil.h zlib.h zconf.h +inffast.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h +inflate.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h +infback.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h +inftrees.o: zutil.h zlib.h zconf.h inftrees.h +minigzip.o: zlib.h zconf.h +trees.o: deflate.h zutil.h zlib.h zconf.h trees.h +uncompr.o: zlib.h zconf.h +zutil.o: zutil.h zlib.h zconf.h Added: external/zlib/amiga/Makefile.sas ============================================================================== --- (empty file) +++ external/zlib/amiga/Makefile.sas Tue Jan 3 07:42:59 2006 @@ -0,0 +1,65 @@ +# SMakefile for zlib +# Modified from the standard UNIX Makefile Copyright Jean-loup Gailly +# Osma Ahvenlampi +# Amiga, SAS/C 6.56 & Smake + +CC=sc +CFLAGS=OPT +#CFLAGS=OPT CPU=68030 +#CFLAGS=DEBUG=LINE +LDFLAGS=LIB z.lib + +SCOPTIONS=OPTSCHED OPTINLINE OPTALIAS OPTTIME OPTINLOCAL STRMERGE \ + NOICONS PARMS=BOTH NOSTACKCHECK UTILLIB NOVERSION ERRORREXX \ + DEF=POSTINC + +OBJS = adler32.o compress.o crc32.o gzio.o uncompr.o deflate.o trees.o \ + zutil.o inflate.o infback.o inftrees.o inffast.o + +TEST_OBJS = example.o minigzip.o + +all: SCOPTIONS example minigzip + +check: test +test: all + example + echo hello world | minigzip | minigzip -d + +install: z.lib + copy clone zlib.h zconf.h INCLUDE: + copy clone z.lib LIB: + +z.lib: $(OBJS) + oml z.lib r $(OBJS) + +example: example.o z.lib + $(CC) $(CFLAGS) LINK TO $@ example.o $(LDFLAGS) + +minigzip: minigzip.o z.lib + $(CC) $(CFLAGS) LINK TO $@ minigzip.o $(LDFLAGS) + +mostlyclean: clean +clean: + -delete force quiet example minigzip *.o z.lib foo.gz *.lnk SCOPTIONS + +SCOPTIONS: Makefile.sas + copy to $@ 64K on 16-bit machine: */ + if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR; +#endif + stream.next_out = dest; + stream.avail_out = (uInt)*destLen; + if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR; + + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + err = deflateInit(&stream, level); + if (err != Z_OK) return err; + + err = deflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + deflateEnd(&stream); + return err == Z_OK ? Z_BUF_ERROR : err; + } + *destLen = stream.total_out; + + err = deflateEnd(&stream); + return err; +} + +/* =========================================================================== + */ +int ZEXPORT compress (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; +{ + return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION); +} + +/* =========================================================================== + If the default memLevel or windowBits for deflateInit() is changed, then + this function needs to be updated. + */ +uLong ZEXPORT compressBound (sourceLen) + uLong sourceLen; +{ + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + 11; +} Added: external/zlib/configure ============================================================================== --- (empty file) +++ external/zlib/configure Tue Jan 3 07:42:59 2006 @@ -0,0 +1,459 @@ +#!/bin/sh +# configure script for zlib. This script is needed only if +# you wish to build a shared library and your system supports them, +# of if you need special compiler, flags or install directory. +# Otherwise, you can just use directly "make test; make install" +# +# To create a shared library, use "configure --shared"; by default a static +# library is created. If the primitive shared library support provided here +# does not work, use ftp://prep.ai.mit.edu/pub/gnu/libtool-*.tar.gz +# +# To impose specific compiler or flags or install directory, use for example: +# prefix=$HOME CC=cc CFLAGS="-O4" ./configure +# or for csh/tcsh users: +# (setenv prefix $HOME; setenv CC cc; setenv CFLAGS "-O4"; ./configure) +# LDSHARED is the command to be used to create a shared library + +# Incorrect settings of CC or CFLAGS may prevent creating a shared library. +# If you have problems, try without defining CC and CFLAGS before reporting +# an error. + +LIBS=libz.a +LDFLAGS="-L. ${LIBS}" +VER=`sed -n -e '/VERSION "/s/.*"\(.*\)".*/\1/p' < zlib.h` +VER2=`sed -n -e '/VERSION "/s/.*"\([0-9]*\\.[0-9]*\)\\..*/\1/p' < zlib.h` +VER1=`sed -n -e '/VERSION "/s/.*"\([0-9]*\)\\..*/\1/p' < zlib.h` +AR=${AR-"ar rc"} +RANLIB=${RANLIB-"ranlib"} +prefix=${prefix-/usr/local} +exec_prefix=${exec_prefix-'${prefix}'} +libdir=${libdir-'${exec_prefix}/lib'} +includedir=${includedir-'${prefix}/include'} +mandir=${mandir-'${prefix}/share/man'} +shared_ext='.so' +shared=0 +gcc=0 +old_cc="$CC" +old_cflags="$CFLAGS" + +while test $# -ge 1 +do +case "$1" in + -h* | --h*) + echo 'usage:' + echo ' configure [--shared] [--prefix=PREFIX] [--exec_prefix=EXPREFIX]' + echo ' [--libdir=LIBDIR] [--includedir=INCLUDEDIR]' + exit 0;; + -p*=* | --p*=*) prefix=`echo $1 | sed 's/[-a-z_]*=//'`; shift;; + -e*=* | --e*=*) exec_prefix=`echo $1 | sed 's/[-a-z_]*=//'`; shift;; + -l*=* | --libdir=*) libdir=`echo $1 | sed 's/[-a-z_]*=//'`; shift;; + -i*=* | --includedir=*) includedir=`echo $1 | sed 's/[-a-z_]*=//'`;shift;; + -p* | --p*) prefix="$2"; shift; shift;; + -e* | --e*) exec_prefix="$2"; shift; shift;; + -l* | --l*) libdir="$2"; shift; shift;; + -i* | --i*) includedir="$2"; shift; shift;; + -s* | --s*) shared=1; shift;; + *) echo "unknown option: $1"; echo "$0 --help for help"; exit 1;; + esac +done + +test=ztest$$ +cat > $test.c </dev/null; then + CC="$cc" + SFLAGS=${CFLAGS-"-fPIC -O3"} + CFLAGS="$cflags" + case `(uname -s || echo unknown) 2>/dev/null` in + Linux | linux | GNU | GNU/*) LDSHARED=${LDSHARED-"$cc -shared -Wl,-soname,libz.so.1"};; + CYGWIN* | Cygwin* | cygwin* | OS/2* ) + EXE='.exe';; + QNX*) # This is for QNX6. I suppose that the QNX rule below is for QNX2,QNX4 + # (alain.bonnefoy at icbt.com) + LDSHARED=${LDSHARED-"$cc -shared -Wl,-hlibz.so.1"};; + HP-UX*) + LDSHARED=${LDSHARED-"$cc -shared $SFLAGS"} + case `(uname -m || echo unknown) 2>/dev/null` in + ia64) + shared_ext='.so' + SHAREDLIB='libz.so';; + *) + shared_ext='.sl' + SHAREDLIB='libz.sl';; + esac;; + Darwin*) shared_ext='.dylib' + SHAREDLIB=libz$shared_ext + SHAREDLIBV=libz.$VER$shared_ext + SHAREDLIBM=libz.$VER1$shared_ext + LDSHARED=${LDSHARED-"$cc -dynamiclib -install_name $libdir/$SHAREDLIBM -compatibility_version $VER1 -current_version $VER"};; + *) LDSHARED=${LDSHARED-"$cc -shared"};; + esac +else + # find system name and corresponding cc options + CC=${CC-cc} + case `(uname -sr || echo unknown) 2>/dev/null` in + HP-UX*) SFLAGS=${CFLAGS-"-O +z"} + CFLAGS=${CFLAGS-"-O"} +# LDSHARED=${LDSHARED-"ld -b +vnocompatwarnings"} + LDSHARED=${LDSHARED-"ld -b"} + case `(uname -m || echo unknown) 2>/dev/null` in + ia64) + shared_ext='.so' + SHAREDLIB='libz.so';; + *) + shared_ext='.sl' + SHAREDLIB='libz.sl';; + esac;; + IRIX*) SFLAGS=${CFLAGS-"-ansi -O2 -rpath ."} + CFLAGS=${CFLAGS-"-ansi -O2"} + LDSHARED=${LDSHARED-"cc -shared"};; + OSF1\ V4*) SFLAGS=${CFLAGS-"-O -std1"} + CFLAGS=${CFLAGS-"-O -std1"} + LDSHARED=${LDSHARED-"cc -shared -Wl,-soname,libz.so -Wl,-msym -Wl,-rpath,$(libdir) -Wl,-set_version,${VER}:1.0"};; + OSF1*) SFLAGS=${CFLAGS-"-O -std1"} + CFLAGS=${CFLAGS-"-O -std1"} + LDSHARED=${LDSHARED-"cc -shared"};; + QNX*) SFLAGS=${CFLAGS-"-4 -O"} + CFLAGS=${CFLAGS-"-4 -O"} + LDSHARED=${LDSHARED-"cc"} + RANLIB=${RANLIB-"true"} + AR="cc -A";; + SCO_SV\ 3.2*) SFLAGS=${CFLAGS-"-O3 -dy -KPIC "} + CFLAGS=${CFLAGS-"-O3"} + LDSHARED=${LDSHARED-"cc -dy -KPIC -G"};; + SunOS\ 5*) SFLAGS=${CFLAGS-"-fast -xcg89 -KPIC -R."} + CFLAGS=${CFLAGS-"-fast -xcg89"} + LDSHARED=${LDSHARED-"cc -G"};; + SunOS\ 4*) SFLAGS=${CFLAGS-"-O2 -PIC"} + CFLAGS=${CFLAGS-"-O2"} + LDSHARED=${LDSHARED-"ld"};; + SunStudio\ 9*) SFLAGS=${CFLAGS-"-DUSE_MMAP -fast -xcode=pic32 -xtarget=ultra3 -xarch=v9b"} + CFLAGS=${CFLAGS-"-DUSE_MMAP -fast -xtarget=ultra3 -xarch=v9b"} + LDSHARED=${LDSHARED-"cc -xarch=v9b"};; + UNIX_System_V\ 4.2.0) + SFLAGS=${CFLAGS-"-KPIC -O"} + CFLAGS=${CFLAGS-"-O"} + LDSHARED=${LDSHARED-"cc -G"};; + UNIX_SV\ 4.2MP) + SFLAGS=${CFLAGS-"-Kconform_pic -O"} + CFLAGS=${CFLAGS-"-O"} + LDSHARED=${LDSHARED-"cc -G"};; + OpenUNIX\ 5) + SFLAGS=${CFLAGS-"-KPIC -O"} + CFLAGS=${CFLAGS-"-O"} + LDSHARED=${LDSHARED-"cc -G"};; + AIX*) # Courtesy of dbakker at arrayasolutions.com + SFLAGS=${CFLAGS-"-O -qmaxmem=8192"} + CFLAGS=${CFLAGS-"-O -qmaxmem=8192"} + LDSHARED=${LDSHARED-"xlc -G"};; + # send working options for other systems to support at gzip.org + *) SFLAGS=${CFLAGS-"-O"} + CFLAGS=${CFLAGS-"-O"} + LDSHARED=${LDSHARED-"cc -shared"};; + esac +fi + +SHAREDLIB=${SHAREDLIB-"libz$shared_ext"} +SHAREDLIBV=${SHAREDLIBV-"libz$shared_ext.$VER"} +SHAREDLIBM=${SHAREDLIBM-"libz$shared_ext.$VER1"} + +if test $shared -eq 1; then + echo Checking for shared library support... + # we must test in two steps (cc then ld), required at least on SunOS 4.x + if test "`($CC -c $SFLAGS $test.c) 2>&1`" = "" && + test "`($LDSHARED -o $test$shared_ext $test.o) 2>&1`" = ""; then + CFLAGS="$SFLAGS" + LIBS="$SHAREDLIBV" + echo Building shared library $SHAREDLIBV with $CC. + elif test -z "$old_cc" -a -z "$old_cflags"; then + echo No shared library support. + shared=0; + else + echo 'No shared library support; try without defining CC and CFLAGS' + shared=0; + fi +fi +if test $shared -eq 0; then + LDSHARED="$CC" + echo Building static library $LIBS version $VER with $CC. +else + LDFLAGS="-L. ${SHAREDLIBV}" +fi + +cat > $test.c < +int main() { return 0; } +EOF +if test "`($CC -c $CFLAGS $test.c) 2>&1`" = ""; then + sed < zconf.in.h "/HAVE_UNISTD_H/s%0%1%" > zconf.h + echo "Checking for unistd.h... Yes." +else + cp -p zconf.in.h zconf.h + echo "Checking for unistd.h... No." +fi + +cat > $test.c < +#include +#include "zconf.h" + +int main() +{ +#ifndef STDC + choke me +#endif + + return 0; +} +EOF + +if test "`($CC -c $CFLAGS $test.c) 2>&1`" = ""; then + echo "Checking whether to use vs[n]printf() or s[n]printf()... using vs[n]printf()" + + cat > $test.c < +#include + +int mytest(char *fmt, ...) +{ + char buf[20]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + return 0; +} + +int main() +{ + return (mytest("Hello%d\n", 1)); +} +EOF + + if test "`($CC $CFLAGS -o $test $test.c) 2>&1`" = ""; then + echo "Checking for vsnprintf() in stdio.h... Yes." + + cat >$test.c < +#include + +int mytest(char *fmt, ...) +{ + int n; + char buf[20]; + va_list ap; + + va_start(ap, fmt); + n = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + return n; +} + +int main() +{ + return (mytest("Hello%d\n", 1)); +} +EOF + + if test "`($CC -c $CFLAGS $test.c) 2>&1`" = ""; then + echo "Checking for return value of vsnprintf()... Yes." + else + CFLAGS="$CFLAGS -DHAS_vsnprintf_void" + echo "Checking for return value of vsnprintf()... No." + echo " WARNING: apparently vsnprintf() does not return a value. zlib" + echo " can build but will be open to possible string-format security" + echo " vulnerabilities." + fi + else + CFLAGS="$CFLAGS -DNO_vsnprintf" + echo "Checking for vsnprintf() in stdio.h... No." + echo " WARNING: vsnprintf() not found, falling back to vsprintf(). zlib" + echo " can build but will be open to possible buffer-overflow security" + echo " vulnerabilities." + + cat >$test.c < +#include + +int mytest(char *fmt, ...) +{ + int n; + char buf[20]; + va_list ap; + + va_start(ap, fmt); + n = vsprintf(buf, fmt, ap); + va_end(ap); + return n; +} + +int main() +{ + return (mytest("Hello%d\n", 1)); +} +EOF + + if test "`($CC -c $CFLAGS $test.c) 2>&1`" = ""; then + echo "Checking for return value of vsprintf()... Yes." + else + CFLAGS="$CFLAGS -DHAS_vsprintf_void" + echo "Checking for return value of vsprintf()... No." + echo " WARNING: apparently vsprintf() does not return a value. zlib" + echo " can build but will be open to possible string-format security" + echo " vulnerabilities." + fi + fi +else + echo "Checking whether to use vs[n]printf() or s[n]printf()... using s[n]printf()" + + cat >$test.c < + +int mytest() +{ + char buf[20]; + + snprintf(buf, sizeof(buf), "%s", "foo"); + return 0; +} + +int main() +{ + return (mytest()); +} +EOF + + if test "`($CC $CFLAGS -o $test $test.c) 2>&1`" = ""; then + echo "Checking for snprintf() in stdio.h... Yes." + + cat >$test.c < + +int mytest() +{ + char buf[20]; + + return snprintf(buf, sizeof(buf), "%s", "foo"); +} + +int main() +{ + return (mytest()); +} +EOF + + if test "`($CC -c $CFLAGS $test.c) 2>&1`" = ""; then + echo "Checking for return value of snprintf()... Yes." + else + CFLAGS="$CFLAGS -DHAS_snprintf_void" + echo "Checking for return value of snprintf()... No." + echo " WARNING: apparently snprintf() does not return a value. zlib" + echo " can build but will be open to possible string-format security" + echo " vulnerabilities." + fi + else + CFLAGS="$CFLAGS -DNO_snprintf" + echo "Checking for snprintf() in stdio.h... No." + echo " WARNING: snprintf() not found, falling back to sprintf(). zlib" + echo " can build but will be open to possible buffer-overflow security" + echo " vulnerabilities." + + cat >$test.c < + +int mytest() +{ + char buf[20]; + + return sprintf(buf, "%s", "foo"); +} + +int main() +{ + return (mytest()); +} +EOF + + if test "`($CC -c $CFLAGS $test.c) 2>&1`" = ""; then + echo "Checking for return value of sprintf()... Yes." + else + CFLAGS="$CFLAGS -DHAS_sprintf_void" + echo "Checking for return value of sprintf()... No." + echo " WARNING: apparently sprintf() does not return a value. zlib" + echo " can build but will be open to possible string-format security" + echo " vulnerabilities." + fi + fi +fi + +cat >$test.c < +int main() { return 0; } +EOF +if test "`($CC -c $CFLAGS $test.c) 2>&1`" = ""; then + echo "Checking for errno.h... Yes." +else + echo "Checking for errno.h... No." + CFLAGS="$CFLAGS -DNO_ERRNO_H" +fi + +cat > $test.c < +#include +#include +caddr_t hello() { + return mmap((caddr_t)0, (off_t)0, PROT_READ, MAP_SHARED, 0, (off_t)0); +} +EOF +if test "`($CC -c $CFLAGS $test.c) 2>&1`" = ""; then + CFLAGS="$CFLAGS -DUSE_MMAP" + echo Checking for mmap support... Yes. +else + echo Checking for mmap support... No. +fi + +CPP=${CPP-"$CC -E"} +case $CFLAGS in + *ASMV*) + if test "`nm $test.o | grep _hello`" = ""; then + CPP="$CPP -DNO_UNDERLINE" + echo Checking for underline in external names... No. + else + echo Checking for underline in external names... Yes. + fi;; +esac + +rm -f $test.[co] $test $test$shared_ext + +# udpate Makefile +sed < Makefile.in " +/^CC *=/s#=.*#=$CC# +/^CFLAGS *=/s#=.*#=$CFLAGS# +/^CPP *=/s#=.*#=$CPP# +/^LDSHARED *=/s#=.*#=$LDSHARED# +/^LIBS *=/s#=.*#=$LIBS# +/^SHAREDLIB *=/s#=.*#=$SHAREDLIB# +/^SHAREDLIBV *=/s#=.*#=$SHAREDLIBV# +/^SHAREDLIBM *=/s#=.*#=$SHAREDLIBM# +/^AR *=/s#=.*#=$AR# +/^RANLIB *=/s#=.*#=$RANLIB# +/^EXE *=/s#=.*#=$EXE# +/^prefix *=/s#=.*#=$prefix# +/^exec_prefix *=/s#=.*#=$exec_prefix# +/^libdir *=/s#=.*#=$libdir# +/^includedir *=/s#=.*#=$includedir# +/^mandir *=/s#=.*#=$mandir# +/^LDFLAGS *=/s#=.*#=$LDFLAGS# +" > Makefile Added: external/zlib/contrib/README.contrib ============================================================================== --- (empty file) +++ external/zlib/contrib/README.contrib Tue Jan 3 07:42:59 2006 @@ -0,0 +1,71 @@ +All files under this contrib directory are UNSUPPORTED. There were +provided by users of zlib and were not tested by the authors of zlib. +Use at your own risk. Please contact the authors of the contributions +for help about these, not the zlib authors. Thanks. + + +ada/ by Dmitriy Anisimkov + Support for Ada + See http://zlib-ada.sourceforge.net/ + +asm586/ +asm686/ by Brian Raiter + asm code for Pentium and PPro/PII, using the AT&T (GNU as) syntax + See http://www.muppetlabs.com/~breadbox/software/assembly.html + +blast/ by Mark Adler + Decompressor for output of PKWare Data Compression Library (DCL) + +delphi/ by Cosmin Truta + Support for Delphi and C++ Builder + +dotzlib/ by Henrik Ravn + Support for Microsoft .Net and Visual C++ .Net + +infback9/ by Mark Adler + Unsupported diffs to infback to decode the deflate64 format + +inflate86/ by Chris Anderson + Tuned x86 gcc asm code to replace inflate_fast() + +iostream/ by Kevin Ruland + A C++ I/O streams interface to the zlib gz* functions + +iostream2/ by Tyge Løvset + Another C++ I/O streams interface + +iostream3/ by Ludwig Schwardt + and Kevin Ruland + Yet another C++ I/O streams interface + +masm686/ by Dan Higdon + and Chuck Walbourn + asm code for Pentium Pro/PII, using the MASM syntax + +masmx64/ by Gilles Vollant + x86 64-bit (AMD64 and Intel EM64t) code for x64 assembler to + replace longest_match() and inflate_fast() + +masmx86/ by Gilles Vollant + x86 asm code to replace longest_match() and inflate_fast(), + for Visual C++ and MASM + +minizip/ by Gilles Vollant + Mini zip and unzip based on zlib + See http://www.winimage.com/zLibDll/unzip.html + +pascal/ by Bob Dellaca et al. + Support for Pascal + +puff/ by Mark Adler + Small, low memory usage inflate. Also serves to provide an + unambiguous description of the deflate format. + +testzlib/ by Gilles Vollant + Example of the use of zlib + +untgz/ by Pedro A. Aranda Gutierrez + A very simple tar.gz file extractor using zlib + +vstudio/ by Gilles Vollant + Building a minizip-enhanced zlib with Microsoft Visual Studio Added: external/zlib/contrib/ada/buffer_demo.adb ============================================================================== --- (empty file) +++ external/zlib/contrib/ada/buffer_demo.adb Tue Jan 3 07:42:59 2006 @@ -0,0 +1,106 @@ +---------------------------------------------------------------- +-- ZLib for Ada thick binding. -- +-- -- +-- Copyright (C) 2002-2004 Dmitriy Anisimkov -- +-- -- +-- Open source license information is in the zlib.ads file. -- +---------------------------------------------------------------- +-- +-- $Id: buffer_demo.adb,v 1.3 2004/09/06 06:55:35 vagul Exp $ + +-- This demo program provided by Dr Steve Sangwine +-- +-- Demonstration of a problem with Zlib-Ada (already fixed) when a buffer +-- of exactly the correct size is used for decompressed data, and the last +-- few bytes passed in to Zlib are checksum bytes. + +-- This program compresses a string of text, and then decompresses the +-- compressed text into a buffer of the same size as the original text. + +with Ada.Streams; use Ada.Streams; +with Ada.Text_IO; + +with ZLib; use ZLib; + +procedure Buffer_Demo is + EOL : Character renames ASCII.LF; + Text : constant String + := "Four score and seven years ago our fathers brought forth," & EOL & + "upon this continent, a new nation, conceived in liberty," & EOL & + "and dedicated to the proposition that `all men are created equal'."; + + Source : Stream_Element_Array (1 .. Text'Length); + for Source'Address use Text'Address; + +begin + Ada.Text_IO.Put (Text); + Ada.Text_IO.New_Line; + Ada.Text_IO.Put_Line + ("Uncompressed size : " & Positive'Image (Text'Length) & " bytes"); + + declare + Compressed_Data : Stream_Element_Array (1 .. Text'Length); + L : Stream_Element_Offset; + begin + Compress : declare + Compressor : Filter_Type; + I : Stream_Element_Offset; + begin + Deflate_Init (Compressor); + + -- Compress the whole of T at once. + + Translate (Compressor, Source, I, Compressed_Data, L, Finish); + pragma Assert (I = Source'Last); + + Close (Compressor); + + Ada.Text_IO.Put_Line + ("Compressed size : " + & Stream_Element_Offset'Image (L) & " bytes"); + end Compress; + + -- Now we decompress the data, passing short blocks of data to Zlib + -- (because this demonstrates the problem - the last block passed will + -- contain checksum information and there will be no output, only a + -- check inside Zlib that the checksum is correct). + + Decompress : declare + Decompressor : Filter_Type; + + Uncompressed_Data : Stream_Element_Array (1 .. Text'Length); + + Block_Size : constant := 4; + -- This makes sure that the last block contains + -- only Adler checksum data. + + P : Stream_Element_Offset := Compressed_Data'First - 1; + O : Stream_Element_Offset; + begin + Inflate_Init (Decompressor); + + loop + Translate + (Decompressor, + Compressed_Data + (P + 1 .. Stream_Element_Offset'Min (P + Block_Size, L)), + P, + Uncompressed_Data + (Total_Out (Decompressor) + 1 .. Uncompressed_Data'Last), + O, + No_Flush); + + Ada.Text_IO.Put_Line + ("Total in : " & Count'Image (Total_In (Decompressor)) & + ", out : " & Count'Image (Total_Out (Decompressor))); + + exit when P = L; + end loop; + + Ada.Text_IO.New_Line; + Ada.Text_IO.Put_Line + ("Decompressed text matches original text : " + & Boolean'Image (Uncompressed_Data = Source)); + end Decompress; + end; +end Buffer_Demo; Added: external/zlib/contrib/ada/mtest.adb ============================================================================== --- (empty file) +++ external/zlib/contrib/ada/mtest.adb Tue Jan 3 07:42:59 2006 @@ -0,0 +1,156 @@ +---------------------------------------------------------------- +-- ZLib for Ada thick binding. -- +-- -- +-- Copyright (C) 2002-2003 Dmitriy Anisimkov -- +-- -- +-- Open source license information is in the zlib.ads file. -- +---------------------------------------------------------------- +-- Continuous test for ZLib multithreading. If the test would fail +-- we should provide thread safe allocation routines for the Z_Stream. +-- +-- $Id: mtest.adb,v 1.4 2004/07/23 07:49:54 vagul Exp $ + +with ZLib; +with Ada.Streams; +with Ada.Numerics.Discrete_Random; +with Ada.Text_IO; +with Ada.Exceptions; +with Ada.Task_Identification; + +procedure MTest is + use Ada.Streams; + use ZLib; + + Stop : Boolean := False; + + pragma Atomic (Stop); + + subtype Visible_Symbols is Stream_Element range 16#20# .. 16#7E#; + + package Random_Elements is + new Ada.Numerics.Discrete_Random (Visible_Symbols); + + task type Test_Task; + + task body Test_Task is + Buffer : Stream_Element_Array (1 .. 100_000); + Gen : Random_Elements.Generator; + + Buffer_First : Stream_Element_Offset; + Compare_First : Stream_Element_Offset; + + Deflate : Filter_Type; + Inflate : Filter_Type; + + procedure Further (Item : in Stream_Element_Array); + + procedure Read_Buffer + (Item : out Ada.Streams.Stream_Element_Array; + Last : out Ada.Streams.Stream_Element_Offset); + + ------------- + -- Further -- + ------------- + + procedure Further (Item : in Stream_Element_Array) is + + procedure Compare (Item : in Stream_Element_Array); + + ------------- + -- Compare -- + ------------- + + procedure Compare (Item : in Stream_Element_Array) is + Next_First : Stream_Element_Offset := Compare_First + Item'Length; + begin + if Buffer (Compare_First .. Next_First - 1) /= Item then + raise Program_Error; + end if; + + Compare_First := Next_First; + end Compare; + + procedure Compare_Write is new ZLib.Write (Write => Compare); + begin + Compare_Write (Inflate, Item, No_Flush); + end Further; + + ----------------- + -- Read_Buffer -- + ----------------- + + procedure Read_Buffer + (Item : out Ada.Streams.Stream_Element_Array; + Last : out Ada.Streams.Stream_Element_Offset) + is + Buff_Diff : Stream_Element_Offset := Buffer'Last - Buffer_First; + Next_First : Stream_Element_Offset; + begin + if Item'Length <= Buff_Diff then + Last := Item'Last; + + Next_First := Buffer_First + Item'Length; + + Item := Buffer (Buffer_First .. Next_First - 1); + + Buffer_First := Next_First; + else + Last := Item'First + Buff_Diff; + Item (Item'First .. Last) := Buffer (Buffer_First .. Buffer'Last); + Buffer_First := Buffer'Last + 1; + end if; + end Read_Buffer; + + procedure Translate is new Generic_Translate + (Data_In => Read_Buffer, + Data_Out => Further); + + begin + Random_Elements.Reset (Gen); + + Buffer := (others => 20); + + Main : loop + for J in Buffer'Range loop + Buffer (J) := Random_Elements.Random (Gen); + + Deflate_Init (Deflate); + Inflate_Init (Inflate); + + Buffer_First := Buffer'First; + Compare_First := Buffer'First; + + Translate (Deflate); + + if Compare_First /= Buffer'Last + 1 then + raise Program_Error; + end if; + + Ada.Text_IO.Put_Line + (Ada.Task_Identification.Image + (Ada.Task_Identification.Current_Task) + & Stream_Element_Offset'Image (J) + & ZLib.Count'Image (Total_Out (Deflate))); + + Close (Deflate); + Close (Inflate); + + exit Main when Stop; + end loop; + end loop Main; + exception + when E : others => + Ada.Text_IO.Put_Line (Ada.Exceptions.Exception_Information (E)); + Stop := True; + end Test_Task; + + Test : array (1 .. 4) of Test_Task; + + pragma Unreferenced (Test); + + Dummy : Character; + +begin + Ada.Text_IO.Get_Immediate (Dummy); + Stop := True; +end MTest; Added: external/zlib/contrib/ada/read.adb ============================================================================== --- (empty file) +++ external/zlib/contrib/ada/read.adb Tue Jan 3 07:42:59 2006 @@ -0,0 +1,156 @@ +---------------------------------------------------------------- +-- ZLib for Ada thick binding. -- +-- -- +-- Copyright (C) 2002-2003 Dmitriy Anisimkov -- +-- -- +-- Open source license information is in the zlib.ads file. -- +---------------------------------------------------------------- + +-- $Id: read.adb,v 1.8 2004/05/31 10:53:40 vagul Exp $ + +-- Test/demo program for the generic read interface. + +with Ada.Numerics.Discrete_Random; +with Ada.Streams; +with Ada.Text_IO; + +with ZLib; + +procedure Read is + + use Ada.Streams; + + ------------------------------------ + -- Test configuration parameters -- + ------------------------------------ + + File_Size : Stream_Element_Offset := 100_000; + + Continuous : constant Boolean := False; + -- If this constant is True, the test would be repeated again and again, + -- with increment File_Size for every iteration. + + Header : constant ZLib.Header_Type := ZLib.Default; + -- Do not use Header other than Default in ZLib versions 1.1.4 and older. + + Init_Random : constant := 8; + -- We are using the same random sequence, in case of we catch bug, + -- so we would be able to reproduce it. + + -- End -- + + Pack_Size : Stream_Element_Offset; + Offset : Stream_Element_Offset; + + Filter : ZLib.Filter_Type; + + subtype Visible_Symbols + is Stream_Element range 16#20# .. 16#7E#; + + package Random_Elements is new + Ada.Numerics.Discrete_Random (Visible_Symbols); + + Gen : Random_Elements.Generator; + Period : constant Stream_Element_Offset := 200; + -- Period constant variable for random generator not to be very random. + -- Bigger period, harder random. + + Read_Buffer : Stream_Element_Array (1 .. 2048); + Read_First : Stream_Element_Offset; + Read_Last : Stream_Element_Offset; + + procedure Reset; + + procedure Read + (Item : out Stream_Element_Array; + Last : out Stream_Element_Offset); + -- this procedure is for generic instantiation of + -- ZLib.Read + -- reading data from the File_In. + + procedure Read is new ZLib.Read + (Read, + Read_Buffer, + Rest_First => Read_First, + Rest_Last => Read_Last); + + ---------- + -- Read -- + ---------- + + procedure Read + (Item : out Stream_Element_Array; + Last : out Stream_Element_Offset) is + begin + Last := Stream_Element_Offset'Min + (Item'Last, + Item'First + File_Size - Offset); + + for J in Item'First .. Last loop + if J < Item'First + Period then + Item (J) := Random_Elements.Random (Gen); + else + Item (J) := Item (J - Period); + end if; + + Offset := Offset + 1; + end loop; + end Read; + + ----------- + -- Reset -- + ----------- + + procedure Reset is + begin + Random_Elements.Reset (Gen, Init_Random); + Pack_Size := 0; + Offset := 1; + Read_First := Read_Buffer'Last + 1; + Read_Last := Read_Buffer'Last; + end Reset; + +begin + Ada.Text_IO.Put_Line ("ZLib " & ZLib.Version); + + loop + for Level in ZLib.Compression_Level'Range loop + + Ada.Text_IO.Put ("Level =" + & ZLib.Compression_Level'Image (Level)); + + -- Deflate using generic instantiation. + + ZLib.Deflate_Init + (Filter, + Level, + Header => Header); + + Reset; + + Ada.Text_IO.Put + (Stream_Element_Offset'Image (File_Size) & " ->"); + + loop + declare + Buffer : Stream_Element_Array (1 .. 1024); + Last : Stream_Element_Offset; + begin + Read (Filter, Buffer, Last); + + Pack_Size := Pack_Size + Last - Buffer'First + 1; + + exit when Last < Buffer'Last; + end; + end loop; + + Ada.Text_IO.Put_Line (Stream_Element_Offset'Image (Pack_Size)); + + ZLib.Close (Filter); + end loop; + + exit when not Continuous; + + File_Size := File_Size + 1; + end loop; +end Read; Added: external/zlib/contrib/ada/readme.txt ============================================================================== --- (empty file) +++ external/zlib/contrib/ada/readme.txt Tue Jan 3 07:42:59 2006 @@ -0,0 +1,65 @@ + ZLib for Ada thick binding (ZLib.Ada) + Release 1.3 + +ZLib.Ada is a thick binding interface to the popular ZLib data +compression library, available at http://www.gzip.org/zlib/. +It provides Ada-style access to the ZLib C library. + + + Here are the main changes since ZLib.Ada 1.2: + +- Attension: ZLib.Read generic routine have a initialization requirement + for Read_Last parameter now. It is a bit incompartible with previous version, + but extends functionality, we could use new parameters Allow_Read_Some and + Flush now. + +- Added Is_Open routines to ZLib and ZLib.Streams packages. + +- Add pragma Assert to check Stream_Element is 8 bit. + +- Fix extraction to buffer with exact known decompressed size. Error reported by + Steve Sangwine. + +- Fix definition of ULong (changed to unsigned_long), fix regression on 64 bits + computers. Patch provided by Pascal Obry. + +- Add Status_Error exception definition. + +- Add pragma Assertion that Ada.Streams.Stream_Element size is 8 bit. + + + How to build ZLib.Ada under GNAT + +You should have the ZLib library already build on your computer, before +building ZLib.Ada. Make the directory of ZLib.Ada sources current and +issue the command: + + gnatmake test -largs -L -lz + +Or use the GNAT project file build for GNAT 3.15 or later: + + gnatmake -Pzlib.gpr -L + + + How to build ZLib.Ada under Aonix ObjectAda for Win32 7.2.2 + +1. Make a project with all *.ads and *.adb files from the distribution. +2. Build the libz.a library from the ZLib C sources. +3. Rename libz.a to z.lib. +4. Add the library z.lib to the project. +5. Add the libc.lib library from the ObjectAda distribution to the project. +6. Build the executable using test.adb as a main procedure. + + + How to use ZLib.Ada + +The source files test.adb and read.adb are small demo programs that show +the main functionality of ZLib.Ada. + +The routines from the package specifications are commented. + + +Homepage: http://zlib-ada.sourceforge.net/ +Author: Dmitriy Anisimkov + +Contributors: Pascal Obry , Steve Sangwine Added: external/zlib/contrib/ada/test.adb ============================================================================== --- (empty file) +++ external/zlib/contrib/ada/test.adb Tue Jan 3 07:42:59 2006 @@ -0,0 +1,463 @@ +---------------------------------------------------------------- +-- ZLib for Ada thick binding. -- +-- -- +-- Copyright (C) 2002-2003 Dmitriy Anisimkov -- +-- -- +-- Open source license information is in the zlib.ads file. -- +---------------------------------------------------------------- + +-- $Id: test.adb,v 1.17 2003/08/12 12:13:30 vagul Exp $ + +-- The program has a few aims. +-- 1. Test ZLib.Ada95 thick binding functionality. +-- 2. Show the example of use main functionality of the ZLib.Ada95 binding. +-- 3. Build this program automatically compile all ZLib.Ada95 packages under +-- GNAT Ada95 compiler. + +with ZLib.Streams; +with Ada.Streams.Stream_IO; +with Ada.Numerics.Discrete_Random; + +with Ada.Text_IO; + +with Ada.Calendar; + +procedure Test is + + use Ada.Streams; + use Stream_IO; + + ------------------------------------ + -- Test configuration parameters -- + ------------------------------------ + + File_Size : Count := 100_000; + Continuous : constant Boolean := False; + + Header : constant ZLib.Header_Type := ZLib.Default; + -- ZLib.None; + -- ZLib.Auto; + -- ZLib.GZip; + -- Do not use Header other then Default in ZLib versions 1.1.4 + -- and older. + + Strategy : constant ZLib.Strategy_Type := ZLib.Default_Strategy; + Init_Random : constant := 10; + + -- End -- + + In_File_Name : constant String := "testzlib.in"; + -- Name of the input file + + Z_File_Name : constant String := "testzlib.zlb"; + -- Name of the compressed file. + + Out_File_Name : constant String := "testzlib.out"; + -- Name of the decompressed file. + + File_In : File_Type; + File_Out : File_Type; + File_Back : File_Type; + File_Z : ZLib.Streams.Stream_Type; + + Filter : ZLib.Filter_Type; + + Time_Stamp : Ada.Calendar.Time; + + procedure Generate_File; + -- Generate file of spetsified size with some random data. + -- The random data is repeatable, for the good compression. + + procedure Compare_Streams + (Left, Right : in out Root_Stream_Type'Class); + -- The procedure compearing data in 2 streams. + -- It is for compare data before and after compression/decompression. + + procedure Compare_Files (Left, Right : String); + -- Compare files. Based on the Compare_Streams. + + procedure Copy_Streams + (Source, Target : in out Root_Stream_Type'Class; + Buffer_Size : in Stream_Element_Offset := 1024); + -- Copying data from one stream to another. It is for test stream + -- interface of the library. + + procedure Data_In + (Item : out Stream_Element_Array; + Last : out Stream_Element_Offset); + -- this procedure is for generic instantiation of + -- ZLib.Generic_Translate. + -- reading data from the File_In. + + procedure Data_Out (Item : in Stream_Element_Array); + -- this procedure is for generic instantiation of + -- ZLib.Generic_Translate. + -- writing data to the File_Out. + + procedure Stamp; + -- Store the timestamp to the local variable. + + procedure Print_Statistic (Msg : String; Data_Size : ZLib.Count); + -- Print the time statistic with the message. + + procedure Translate is new ZLib.Generic_Translate + (Data_In => Data_In, + Data_Out => Data_Out); + -- This procedure is moving data from File_In to File_Out + -- with compression or decompression, depend on initialization of + -- Filter parameter. + + ------------------- + -- Compare_Files -- + ------------------- + + procedure Compare_Files (Left, Right : String) is + Left_File, Right_File : File_Type; + begin + Open (Left_File, In_File, Left); + Open (Right_File, In_File, Right); + Compare_Streams (Stream (Left_File).all, Stream (Right_File).all); + Close (Left_File); + Close (Right_File); + end Compare_Files; + + --------------------- + -- Compare_Streams -- + --------------------- + + procedure Compare_Streams + (Left, Right : in out Ada.Streams.Root_Stream_Type'Class) + is + Left_Buffer, Right_Buffer : Stream_Element_Array (0 .. 16#FFF#); + Left_Last, Right_Last : Stream_Element_Offset; + begin + loop + Read (Left, Left_Buffer, Left_Last); + Read (Right, Right_Buffer, Right_Last); + + if Left_Last /= Right_Last then + Ada.Text_IO.Put_Line ("Compare error :" + & Stream_Element_Offset'Image (Left_Last) + & " /= " + & Stream_Element_Offset'Image (Right_Last)); + + raise Constraint_Error; + + elsif Left_Buffer (0 .. Left_Last) + /= Right_Buffer (0 .. Right_Last) + then + Ada.Text_IO.Put_Line ("ERROR: IN and OUT files is not equal."); + raise Constraint_Error; + + end if; + + exit when Left_Last < Left_Buffer'Last; + end loop; + end Compare_Streams; + + ------------------ + -- Copy_Streams -- + ------------------ + + procedure Copy_Streams + (Source, Target : in out Ada.Streams.Root_Stream_Type'Class; + Buffer_Size : in Stream_Element_Offset := 1024) + is + Buffer : Stream_Element_Array (1 .. Buffer_Size); + Last : Stream_Element_Offset; + begin + loop + Read (Source, Buffer, Last); + Write (Target, Buffer (1 .. Last)); + + exit when Last < Buffer'Last; + end loop; + end Copy_Streams; + + ------------- + -- Data_In -- + ------------- + + procedure Data_In + (Item : out Stream_Element_Array; + Last : out Stream_Element_Offset) is + begin + Read (File_In, Item, Last); + end Data_In; + + -------------- + -- Data_Out -- + -------------- + + procedure Data_Out (Item : in Stream_Element_Array) is + begin + Write (File_Out, Item); + end Data_Out; + + ------------------- + -- Generate_File -- + ------------------- + + procedure Generate_File is + subtype Visible_Symbols is Stream_Element range 16#20# .. 16#7E#; + + package Random_Elements is + new Ada.Numerics.Discrete_Random (Visible_Symbols); + + Gen : Random_Elements.Generator; + Buffer : Stream_Element_Array := (1 .. 77 => 16#20#) & 10; + + Buffer_Count : constant Count := File_Size / Buffer'Length; + -- Number of same buffers in the packet. + + Density : constant Count := 30; -- from 0 to Buffer'Length - 2; + + procedure Fill_Buffer (J, D : in Count); + -- Change the part of the buffer. + + ----------------- + -- Fill_Buffer -- + ----------------- + + procedure Fill_Buffer (J, D : in Count) is + begin + for K in 0 .. D loop + Buffer + (Stream_Element_Offset ((J + K) mod (Buffer'Length - 1) + 1)) + := Random_Elements.Random (Gen); + + end loop; + end Fill_Buffer; + + begin + Random_Elements.Reset (Gen, Init_Random); + + Create (File_In, Out_File, In_File_Name); + + Fill_Buffer (1, Buffer'Length - 2); + + for J in 1 .. Buffer_Count loop + Write (File_In, Buffer); + + Fill_Buffer (J, Density); + end loop; + + -- fill remain size. + + Write + (File_In, + Buffer + (1 .. Stream_Element_Offset + (File_Size - Buffer'Length * Buffer_Count))); + + Flush (File_In); + Close (File_In); + end Generate_File; + + --------------------- + -- Print_Statistic -- + --------------------- + + procedure Print_Statistic (Msg : String; Data_Size : ZLib.Count) is + use Ada.Calendar; + use Ada.Text_IO; + + package Count_IO is new Integer_IO (ZLib.Count); + + Curr_Dur : Duration := Clock - Time_Stamp; + begin + Put (Msg); + + Set_Col (20); + Ada.Text_IO.Put ("size ="); + + Count_IO.Put + (Data_Size, + Width => Stream_IO.Count'Image (File_Size)'Length); + + Put_Line (" duration =" & Duration'Image (Curr_Dur)); + end Print_Statistic; + + ----------- + -- Stamp -- + ----------- + + procedure Stamp is + begin + Time_Stamp := Ada.Calendar.Clock; + end Stamp; + +begin + Ada.Text_IO.Put_Line ("ZLib " & ZLib.Version); + + loop + Generate_File; + + for Level in ZLib.Compression_Level'Range loop + + Ada.Text_IO.Put_Line ("Level =" + & ZLib.Compression_Level'Image (Level)); + + -- Test generic interface. + Open (File_In, In_File, In_File_Name); + Create (File_Out, Out_File, Z_File_Name); + + Stamp; + + -- Deflate using generic instantiation. + + ZLib.Deflate_Init + (Filter => Filter, + Level => Level, + Strategy => Strategy, + Header => Header); + + Translate (Filter); + Print_Statistic ("Generic compress", ZLib.Total_Out (Filter)); + ZLib.Close (Filter); + + Close (File_In); + Close (File_Out); + + Open (File_In, In_File, Z_File_Name); + Create (File_Out, Out_File, Out_File_Name); + + Stamp; + + -- Inflate using generic instantiation. + + ZLib.Inflate_Init (Filter, Header => Header); + + Translate (Filter); + Print_Statistic ("Generic decompress", ZLib.Total_Out (Filter)); + + ZLib.Close (Filter); + + Close (File_In); + Close (File_Out); + + Compare_Files (In_File_Name, Out_File_Name); + + -- Test stream interface. + + -- Compress to the back stream. + + Open (File_In, In_File, In_File_Name); + Create (File_Back, Out_File, Z_File_Name); + + Stamp; + + ZLib.Streams.Create + (Stream => File_Z, + Mode => ZLib.Streams.Out_Stream, + Back => ZLib.Streams.Stream_Access + (Stream (File_Back)), + Back_Compressed => True, + Level => Level, + Strategy => Strategy, + Header => Header); + + Copy_Streams + (Source => Stream (File_In).all, + Target => File_Z); + + -- Flushing internal buffers to the back stream. + + ZLib.Streams.Flush (File_Z, ZLib.Finish); + + Print_Statistic ("Write compress", + ZLib.Streams.Write_Total_Out (File_Z)); + + ZLib.Streams.Close (File_Z); + + Close (File_In); + Close (File_Back); + + -- Compare reading from original file and from + -- decompression stream. + + Open (File_In, In_File, In_File_Name); + Open (File_Back, In_File, Z_File_Name); + + ZLib.Streams.Create + (Stream => File_Z, + Mode => ZLib.Streams.In_Stream, + Back => ZLib.Streams.Stream_Access + (Stream (File_Back)), + Back_Compressed => True, + Header => Header); + + Stamp; + Compare_Streams (Stream (File_In).all, File_Z); + + Print_Statistic ("Read decompress", + ZLib.Streams.Read_Total_Out (File_Z)); + + ZLib.Streams.Close (File_Z); + Close (File_In); + Close (File_Back); + + -- Compress by reading from compression stream. + + Open (File_Back, In_File, In_File_Name); + Create (File_Out, Out_File, Z_File_Name); + + ZLib.Streams.Create + (Stream => File_Z, + Mode => ZLib.Streams.In_Stream, + Back => ZLib.Streams.Stream_Access + (Stream (File_Back)), + Back_Compressed => False, + Level => Level, + Strategy => Strategy, + Header => Header); + + Stamp; + Copy_Streams + (Source => File_Z, + Target => Stream (File_Out).all); + + Print_Statistic ("Read compress", + ZLib.Streams.Read_Total_Out (File_Z)); + + ZLib.Streams.Close (File_Z); + + Close (File_Out); + Close (File_Back); + + -- Decompress to decompression stream. + + Open (File_In, In_File, Z_File_Name); + Create (File_Back, Out_File, Out_File_Name); + + ZLib.Streams.Create + (Stream => File_Z, + Mode => ZLib.Streams.Out_Stream, + Back => ZLib.Streams.Stream_Access + (Stream (File_Back)), + Back_Compressed => False, + Header => Header); + + Stamp; + + Copy_Streams + (Source => Stream (File_In).all, + Target => File_Z); + + Print_Statistic ("Write decompress", + ZLib.Streams.Write_Total_Out (File_Z)); + + ZLib.Streams.Close (File_Z); + Close (File_In); + Close (File_Back); + + Compare_Files (In_File_Name, Out_File_Name); + end loop; + + Ada.Text_IO.Put_Line (Count'Image (File_Size) & " Ok."); + + exit when not Continuous; + + File_Size := File_Size + 1; + end loop; +end Test; Added: external/zlib/contrib/ada/zlib-streams.adb ============================================================================== --- (empty file) +++ external/zlib/contrib/ada/zlib-streams.adb Tue Jan 3 07:42:59 2006 @@ -0,0 +1,225 @@ +---------------------------------------------------------------- +-- ZLib for Ada thick binding. -- +-- -- +-- Copyright (C) 2002-2003 Dmitriy Anisimkov -- +-- -- +-- Open source license information is in the zlib.ads file. -- +---------------------------------------------------------------- + +-- $Id: zlib-streams.adb,v 1.10 2004/05/31 10:53:40 vagul Exp $ + +with Ada.Unchecked_Deallocation; + +package body ZLib.Streams is + + ----------- + -- Close -- + ----------- + + procedure Close (Stream : in out Stream_Type) is + procedure Free is new Ada.Unchecked_Deallocation + (Stream_Element_Array, Buffer_Access); + begin + if Stream.Mode = Out_Stream or Stream.Mode = Duplex then + -- We should flush the data written by the writer. + + Flush (Stream, Finish); + + Close (Stream.Writer); + end if; + + if Stream.Mode = In_Stream or Stream.Mode = Duplex then + Close (Stream.Reader); + Free (Stream.Buffer); + end if; + end Close; + + ------------ + -- Create -- + ------------ + + procedure Create + (Stream : out Stream_Type; + Mode : in Stream_Mode; + Back : in Stream_Access; + Back_Compressed : in Boolean; + Level : in Compression_Level := Default_Compression; + Strategy : in Strategy_Type := Default_Strategy; + Header : in Header_Type := Default; + Read_Buffer_Size : in Ada.Streams.Stream_Element_Offset + := Default_Buffer_Size; + Write_Buffer_Size : in Ada.Streams.Stream_Element_Offset + := Default_Buffer_Size) + is + + subtype Buffer_Subtype is Stream_Element_Array (1 .. Read_Buffer_Size); + + procedure Init_Filter + (Filter : in out Filter_Type; + Compress : in Boolean); + + ----------------- + -- Init_Filter -- + ----------------- + + procedure Init_Filter + (Filter : in out Filter_Type; + Compress : in Boolean) is + begin + if Compress then + Deflate_Init + (Filter, Level, Strategy, Header => Header); + else + Inflate_Init (Filter, Header => Header); + end if; + end Init_Filter; + + begin + Stream.Back := Back; + Stream.Mode := Mode; + + if Mode = Out_Stream or Mode = Duplex then + Init_Filter (Stream.Writer, Back_Compressed); + Stream.Buffer_Size := Write_Buffer_Size; + else + Stream.Buffer_Size := 0; + end if; + + if Mode = In_Stream or Mode = Duplex then + Init_Filter (Stream.Reader, not Back_Compressed); + + Stream.Buffer := new Buffer_Subtype; + Stream.Rest_First := Stream.Buffer'Last + 1; + Stream.Rest_Last := Stream.Buffer'Last; + end if; + end Create; + + ----------- + -- Flush -- + ----------- + + procedure Flush + (Stream : in out Stream_Type; + Mode : in Flush_Mode := Sync_Flush) + is + Buffer : Stream_Element_Array (1 .. Stream.Buffer_Size); + Last : Stream_Element_Offset; + begin + loop + Flush (Stream.Writer, Buffer, Last, Mode); + + Ada.Streams.Write (Stream.Back.all, Buffer (1 .. Last)); + + exit when Last < Buffer'Last; + end loop; + end Flush; + + ------------- + -- Is_Open -- + ------------- + + function Is_Open (Stream : Stream_Type) return Boolean is + begin + return Is_Open (Stream.Reader) or else Is_Open (Stream.Writer); + end Is_Open; + + ---------- + -- Read -- + ---------- + + procedure Read + (Stream : in out Stream_Type; + Item : out Stream_Element_Array; + Last : out Stream_Element_Offset) + is + + procedure Read + (Item : out Stream_Element_Array; + Last : out Stream_Element_Offset); + + ---------- + -- Read -- + ---------- + + procedure Read + (Item : out Stream_Element_Array; + Last : out Stream_Element_Offset) is + begin + Ada.Streams.Read (Stream.Back.all, Item, Last); + end Read; + + procedure Read is new ZLib.Read + (Read => Read, + Buffer => Stream.Buffer.all, + Rest_First => Stream.Rest_First, + Rest_Last => Stream.Rest_Last); + + begin + Read (Stream.Reader, Item, Last); + end Read; + + ------------------- + -- Read_Total_In -- + ------------------- + + function Read_Total_In (Stream : in Stream_Type) return Count is + begin + return Total_In (Stream.Reader); + end Read_Total_In; + + -------------------- + -- Read_Total_Out -- + -------------------- + + function Read_Total_Out (Stream : in Stream_Type) return Count is + begin + return Total_Out (Stream.Reader); + end Read_Total_Out; + + ----------- + -- Write -- + ----------- + + procedure Write + (Stream : in out Stream_Type; + Item : in Stream_Element_Array) + is + + procedure Write (Item : in Stream_Element_Array); + + ----------- + -- Write -- + ----------- + + procedure Write (Item : in Stream_Element_Array) is + begin + Ada.Streams.Write (Stream.Back.all, Item); + end Write; + + procedure Write is new ZLib.Write + (Write => Write, + Buffer_Size => Stream.Buffer_Size); + + begin + Write (Stream.Writer, Item, No_Flush); + end Write; + + -------------------- + -- Write_Total_In -- + -------------------- + + function Write_Total_In (Stream : in Stream_Type) return Count is + begin + return Total_In (Stream.Writer); + end Write_Total_In; + + --------------------- + -- Write_Total_Out -- + --------------------- + + function Write_Total_Out (Stream : in Stream_Type) return Count is + begin + return Total_Out (Stream.Writer); + end Write_Total_Out; + +end ZLib.Streams; Added: external/zlib/contrib/ada/zlib-streams.ads ============================================================================== --- (empty file) +++ external/zlib/contrib/ada/zlib-streams.ads Tue Jan 3 07:42:59 2006 @@ -0,0 +1,114 @@ +---------------------------------------------------------------- +-- ZLib for Ada thick binding. -- +-- -- +-- Copyright (C) 2002-2003 Dmitriy Anisimkov -- +-- -- +-- Open source license information is in the zlib.ads file. -- +---------------------------------------------------------------- + +-- $Id: zlib-streams.ads,v 1.12 2004/05/31 10:53:40 vagul Exp $ + +package ZLib.Streams is + + type Stream_Mode is (In_Stream, Out_Stream, Duplex); + + type Stream_Access is access all Ada.Streams.Root_Stream_Type'Class; + + type Stream_Type is + new Ada.Streams.Root_Stream_Type with private; + + procedure Read + (Stream : in out Stream_Type; + Item : out Ada.Streams.Stream_Element_Array; + Last : out Ada.Streams.Stream_Element_Offset); + + procedure Write + (Stream : in out Stream_Type; + Item : in Ada.Streams.Stream_Element_Array); + + procedure Flush + (Stream : in out Stream_Type; + Mode : in Flush_Mode := Sync_Flush); + -- Flush the written data to the back stream, + -- all data placed to the compressor is flushing to the Back stream. + -- Should not be used untill necessary, becouse it is decreasing + -- compression. + + function Read_Total_In (Stream : in Stream_Type) return Count; + pragma Inline (Read_Total_In); + -- Return total number of bytes read from back stream so far. + + function Read_Total_Out (Stream : in Stream_Type) return Count; + pragma Inline (Read_Total_Out); + -- Return total number of bytes read so far. + + function Write_Total_In (Stream : in Stream_Type) return Count; + pragma Inline (Write_Total_In); + -- Return total number of bytes written so far. + + function Write_Total_Out (Stream : in Stream_Type) return Count; + pragma Inline (Write_Total_Out); + -- Return total number of bytes written to the back stream. + + procedure Create + (Stream : out Stream_Type; + Mode : in Stream_Mode; + Back : in Stream_Access; + Back_Compressed : in Boolean; + Level : in Compression_Level := Default_Compression; + Strategy : in Strategy_Type := Default_Strategy; + Header : in Header_Type := Default; + Read_Buffer_Size : in Ada.Streams.Stream_Element_Offset + := Default_Buffer_Size; + Write_Buffer_Size : in Ada.Streams.Stream_Element_Offset + := Default_Buffer_Size); + -- Create the Comression/Decompression stream. + -- If mode is In_Stream then Write operation is disabled. + -- If mode is Out_Stream then Read operation is disabled. + + -- If Back_Compressed is true then + -- Data written to the Stream is compressing to the Back stream + -- and data read from the Stream is decompressed data from the Back stream. + + -- If Back_Compressed is false then + -- Data written to the Stream is decompressing to the Back stream + -- and data read from the Stream is compressed data from the Back stream. + + -- !!! When the Need_Header is False ZLib-Ada is using undocumented + -- ZLib 1.1.4 functionality to do not create/wait for ZLib headers. + + function Is_Open (Stream : Stream_Type) return Boolean; + + procedure Close (Stream : in out Stream_Type); + +private + + use Ada.Streams; + + type Buffer_Access is access all Stream_Element_Array; + + type Stream_Type + is new Root_Stream_Type with + record + Mode : Stream_Mode; + + Buffer : Buffer_Access; + Rest_First : Stream_Element_Offset; + Rest_Last : Stream_Element_Offset; + -- Buffer for Read operation. + -- We need to have this buffer in the record + -- becouse not all read data from back stream + -- could be processed during the read operation. + + Buffer_Size : Stream_Element_Offset; + -- Buffer size for write operation. + -- We do not need to have this buffer + -- in the record becouse all data could be + -- processed in the write operation. + + Back : Stream_Access; + Reader : Filter_Type; + Writer : Filter_Type; + end record; + +end ZLib.Streams; Added: external/zlib/contrib/ada/zlib-thin.adb ============================================================================== --- (empty file) +++ external/zlib/contrib/ada/zlib-thin.adb Tue Jan 3 07:42:59 2006 @@ -0,0 +1,141 @@ +---------------------------------------------------------------- +-- ZLib for Ada thick binding. -- +-- -- +-- Copyright (C) 2002-2003 Dmitriy Anisimkov -- +-- -- +-- Open source license information is in the zlib.ads file. -- +---------------------------------------------------------------- + +-- $Id: zlib-thin.adb,v 1.8 2003/12/14 18:27:31 vagul Exp $ + +package body ZLib.Thin is + + ZLIB_VERSION : constant Chars_Ptr := zlibVersion; + + Z_Stream_Size : constant Int := Z_Stream'Size / System.Storage_Unit; + + -------------- + -- Avail_In -- + -------------- + + function Avail_In (Strm : in Z_Stream) return UInt is + begin + return Strm.Avail_In; + end Avail_In; + + --------------- + -- Avail_Out -- + --------------- + + function Avail_Out (Strm : in Z_Stream) return UInt is + begin + return Strm.Avail_Out; + end Avail_Out; + + ------------------ + -- Deflate_Init -- + ------------------ + + function Deflate_Init + (strm : Z_Streamp; + level : Int; + method : Int; + windowBits : Int; + memLevel : Int; + strategy : Int) + return Int is + begin + return deflateInit2 + (strm, + level, + method, + windowBits, + memLevel, + strategy, + ZLIB_VERSION, + Z_Stream_Size); + end Deflate_Init; + + ------------------ + -- Inflate_Init -- + ------------------ + + function Inflate_Init (strm : Z_Streamp; windowBits : Int) return Int is + begin + return inflateInit2 (strm, windowBits, ZLIB_VERSION, Z_Stream_Size); + end Inflate_Init; + + ------------------------ + -- Last_Error_Message -- + ------------------------ + + function Last_Error_Message (Strm : in Z_Stream) return String is + use Interfaces.C.Strings; + begin + if Strm.msg = Null_Ptr then + return ""; + else + return Value (Strm.msg); + end if; + end Last_Error_Message; + + ------------ + -- Set_In -- + ------------ + + procedure Set_In + (Strm : in out Z_Stream; + Buffer : in Voidp; + Size : in UInt) is + begin + Strm.Next_In := Buffer; + Strm.Avail_In := Size; + end Set_In; + + ------------------ + -- Set_Mem_Func -- + ------------------ + + procedure Set_Mem_Func + (Strm : in out Z_Stream; + Opaque : in Voidp; + Alloc : in alloc_func; + Free : in free_func) is + begin + Strm.opaque := Opaque; + Strm.zalloc := Alloc; + Strm.zfree := Free; + end Set_Mem_Func; + + ------------- + -- Set_Out -- + ------------- + + procedure Set_Out + (Strm : in out Z_Stream; + Buffer : in Voidp; + Size : in UInt) is + begin + Strm.Next_Out := Buffer; + Strm.Avail_Out := Size; + end Set_Out; + + -------------- + -- Total_In -- + -------------- + + function Total_In (Strm : in Z_Stream) return ULong is + begin + return Strm.Total_In; + end Total_In; + + --------------- + -- Total_Out -- + --------------- + + function Total_Out (Strm : in Z_Stream) return ULong is + begin + return Strm.Total_Out; + end Total_Out; + +end ZLib.Thin; Added: external/zlib/contrib/ada/zlib-thin.ads ============================================================================== --- (empty file) +++ external/zlib/contrib/ada/zlib-thin.ads Tue Jan 3 07:42:59 2006 @@ -0,0 +1,450 @@ +---------------------------------------------------------------- +-- ZLib for Ada thick binding. -- +-- -- +-- Copyright (C) 2002-2003 Dmitriy Anisimkov -- +-- -- +-- Open source license information is in the zlib.ads file. -- +---------------------------------------------------------------- + +-- $Id: zlib-thin.ads,v 1.11 2004/07/23 06:33:11 vagul Exp $ + +with Interfaces.C.Strings; + +with System; + +private package ZLib.Thin is + + -- From zconf.h + + MAX_MEM_LEVEL : constant := 9; -- zconf.h:105 + -- zconf.h:105 + MAX_WBITS : constant := 15; -- zconf.h:115 + -- 32K LZ77 window + -- zconf.h:115 + SEEK_SET : constant := 8#0000#; -- zconf.h:244 + -- Seek from beginning of file. + -- zconf.h:244 + SEEK_CUR : constant := 1; -- zconf.h:245 + -- Seek from current position. + -- zconf.h:245 + SEEK_END : constant := 2; -- zconf.h:246 + -- Set file pointer to EOF plus "offset" + -- zconf.h:246 + + type Byte is new Interfaces.C.unsigned_char; -- 8 bits + -- zconf.h:214 + type UInt is new Interfaces.C.unsigned; -- 16 bits or more + -- zconf.h:216 + type Int is new Interfaces.C.int; + + type ULong is new Interfaces.C.unsigned_long; -- 32 bits or more + -- zconf.h:217 + subtype Chars_Ptr is Interfaces.C.Strings.chars_ptr; + + type ULong_Access is access ULong; + type Int_Access is access Int; + + subtype Voidp is System.Address; -- zconf.h:232 + + subtype Byte_Access is Voidp; + + Nul : constant Voidp := System.Null_Address; + -- end from zconf + + Z_NO_FLUSH : constant := 8#0000#; -- zlib.h:125 + -- zlib.h:125 + Z_PARTIAL_FLUSH : constant := 1; -- zlib.h:126 + -- will be removed, use + -- Z_SYNC_FLUSH instead + -- zlib.h:126 + Z_SYNC_FLUSH : constant := 2; -- zlib.h:127 + -- zlib.h:127 + Z_FULL_FLUSH : constant := 3; -- zlib.h:128 + -- zlib.h:128 + Z_FINISH : constant := 4; -- zlib.h:129 + -- zlib.h:129 + Z_OK : constant := 8#0000#; -- zlib.h:132 + -- zlib.h:132 + Z_STREAM_END : constant := 1; -- zlib.h:133 + -- zlib.h:133 + Z_NEED_DICT : constant := 2; -- zlib.h:134 + -- zlib.h:134 + Z_ERRNO : constant := -1; -- zlib.h:135 + -- zlib.h:135 + Z_STREAM_ERROR : constant := -2; -- zlib.h:136 + -- zlib.h:136 + Z_DATA_ERROR : constant := -3; -- zlib.h:137 + -- zlib.h:137 + Z_MEM_ERROR : constant := -4; -- zlib.h:138 + -- zlib.h:138 + Z_BUF_ERROR : constant := -5; -- zlib.h:139 + -- zlib.h:139 + Z_VERSION_ERROR : constant := -6; -- zlib.h:140 + -- zlib.h:140 + Z_NO_COMPRESSION : constant := 8#0000#; -- zlib.h:145 + -- zlib.h:145 + Z_BEST_SPEED : constant := 1; -- zlib.h:146 + -- zlib.h:146 + Z_BEST_COMPRESSION : constant := 9; -- zlib.h:147 + -- zlib.h:147 + Z_DEFAULT_COMPRESSION : constant := -1; -- zlib.h:148 + -- zlib.h:148 + Z_FILTERED : constant := 1; -- zlib.h:151 + -- zlib.h:151 + Z_HUFFMAN_ONLY : constant := 2; -- zlib.h:152 + -- zlib.h:152 + Z_DEFAULT_STRATEGY : constant := 8#0000#; -- zlib.h:153 + -- zlib.h:153 + Z_BINARY : constant := 8#0000#; -- zlib.h:156 + -- zlib.h:156 + Z_ASCII : constant := 1; -- zlib.h:157 + -- zlib.h:157 + Z_UNKNOWN : constant := 2; -- zlib.h:158 + -- zlib.h:158 + Z_DEFLATED : constant := 8; -- zlib.h:161 + -- zlib.h:161 + Z_NULL : constant := 8#0000#; -- zlib.h:164 + -- for initializing zalloc, zfree, opaque + -- zlib.h:164 + type gzFile is new Voidp; -- zlib.h:646 + + type Z_Stream is private; + + type Z_Streamp is access all Z_Stream; -- zlib.h:89 + + type alloc_func is access function + (Opaque : Voidp; + Items : UInt; + Size : UInt) + return Voidp; -- zlib.h:63 + + type free_func is access procedure (opaque : Voidp; address : Voidp); + + function zlibVersion return Chars_Ptr; + + function Deflate (strm : Z_Streamp; flush : Int) return Int; + + function DeflateEnd (strm : Z_Streamp) return Int; + + function Inflate (strm : Z_Streamp; flush : Int) return Int; + + function InflateEnd (strm : Z_Streamp) return Int; + + function deflateSetDictionary + (strm : Z_Streamp; + dictionary : Byte_Access; + dictLength : UInt) + return Int; + + function deflateCopy (dest : Z_Streamp; source : Z_Streamp) return Int; + -- zlib.h:478 + + function deflateReset (strm : Z_Streamp) return Int; -- zlib.h:495 + + function deflateParams + (strm : Z_Streamp; + level : Int; + strategy : Int) + return Int; -- zlib.h:506 + + function inflateSetDictionary + (strm : Z_Streamp; + dictionary : Byte_Access; + dictLength : UInt) + return Int; -- zlib.h:548 + + function inflateSync (strm : Z_Streamp) return Int; -- zlib.h:565 + + function inflateReset (strm : Z_Streamp) return Int; -- zlib.h:580 + + function compress + (dest : Byte_Access; + destLen : ULong_Access; + source : Byte_Access; + sourceLen : ULong) + return Int; -- zlib.h:601 + + function compress2 + (dest : Byte_Access; + destLen : ULong_Access; + source : Byte_Access; + sourceLen : ULong; + level : Int) + return Int; -- zlib.h:615 + + function uncompress + (dest : Byte_Access; + destLen : ULong_Access; + source : Byte_Access; + sourceLen : ULong) + return Int; + + function gzopen (path : Chars_Ptr; mode : Chars_Ptr) return gzFile; + + function gzdopen (fd : Int; mode : Chars_Ptr) return gzFile; + + function gzsetparams + (file : gzFile; + level : Int; + strategy : Int) + return Int; + + function gzread + (file : gzFile; + buf : Voidp; + len : UInt) + return Int; + + function gzwrite + (file : in gzFile; + buf : in Voidp; + len : in UInt) + return Int; + + function gzprintf (file : in gzFile; format : in Chars_Ptr) return Int; + + function gzputs (file : in gzFile; s : in Chars_Ptr) return Int; + + function gzgets + (file : gzFile; + buf : Chars_Ptr; + len : Int) + return Chars_Ptr; + + function gzputc (file : gzFile; char : Int) return Int; + + function gzgetc (file : gzFile) return Int; + + function gzflush (file : gzFile; flush : Int) return Int; + + function gzseek + (file : gzFile; + offset : Int; + whence : Int) + return Int; + + function gzrewind (file : gzFile) return Int; + + function gztell (file : gzFile) return Int; + + function gzeof (file : gzFile) return Int; + + function gzclose (file : gzFile) return Int; + + function gzerror (file : gzFile; errnum : Int_Access) return Chars_Ptr; + + function adler32 + (adler : ULong; + buf : Byte_Access; + len : UInt) + return ULong; + + function crc32 + (crc : ULong; + buf : Byte_Access; + len : UInt) + return ULong; + + function deflateInit + (strm : Z_Streamp; + level : Int; + version : Chars_Ptr; + stream_size : Int) + return Int; + + function deflateInit2 + (strm : Z_Streamp; + level : Int; + method : Int; + windowBits : Int; + memLevel : Int; + strategy : Int; + version : Chars_Ptr; + stream_size : Int) + return Int; + + function Deflate_Init + (strm : Z_Streamp; + level : Int; + method : Int; + windowBits : Int; + memLevel : Int; + strategy : Int) + return Int; + pragma Inline (Deflate_Init); + + function inflateInit + (strm : Z_Streamp; + version : Chars_Ptr; + stream_size : Int) + return Int; + + function inflateInit2 + (strm : in Z_Streamp; + windowBits : in Int; + version : in Chars_Ptr; + stream_size : in Int) + return Int; + + function inflateBackInit + (strm : in Z_Streamp; + windowBits : in Int; + window : in Byte_Access; + version : in Chars_Ptr; + stream_size : in Int) + return Int; + -- Size of window have to be 2**windowBits. + + function Inflate_Init (strm : Z_Streamp; windowBits : Int) return Int; + pragma Inline (Inflate_Init); + + function zError (err : Int) return Chars_Ptr; + + function inflateSyncPoint (z : Z_Streamp) return Int; + + function get_crc_table return ULong_Access; + + -- Interface to the available fields of the z_stream structure. + -- The application must update next_in and avail_in when avail_in has + -- dropped to zero. It must update next_out and avail_out when avail_out + -- has dropped to zero. The application must initialize zalloc, zfree and + -- opaque before calling the init function. + + procedure Set_In + (Strm : in out Z_Stream; + Buffer : in Voidp; + Size : in UInt); + pragma Inline (Set_In); + + procedure Set_Out + (Strm : in out Z_Stream; + Buffer : in Voidp; + Size : in UInt); + pragma Inline (Set_Out); + + procedure Set_Mem_Func + (Strm : in out Z_Stream; + Opaque : in Voidp; + Alloc : in alloc_func; + Free : in free_func); + pragma Inline (Set_Mem_Func); + + function Last_Error_Message (Strm : in Z_Stream) return String; + pragma Inline (Last_Error_Message); + + function Avail_Out (Strm : in Z_Stream) return UInt; + pragma Inline (Avail_Out); + + function Avail_In (Strm : in Z_Stream) return UInt; + pragma Inline (Avail_In); + + function Total_In (Strm : in Z_Stream) return ULong; + pragma Inline (Total_In); + + function Total_Out (Strm : in Z_Stream) return ULong; + pragma Inline (Total_Out); + + function inflateCopy + (dest : in Z_Streamp; + Source : in Z_Streamp) + return Int; + + function compressBound (Source_Len : in ULong) return ULong; + + function deflateBound + (Strm : in Z_Streamp; + Source_Len : in ULong) + return ULong; + + function gzungetc (C : in Int; File : in gzFile) return Int; + + function zlibCompileFlags return ULong; + +private + + type Z_Stream is record -- zlib.h:68 + Next_In : Voidp := Nul; -- next input byte + Avail_In : UInt := 0; -- number of bytes available at next_in + Total_In : ULong := 0; -- total nb of input bytes read so far + Next_Out : Voidp := Nul; -- next output byte should be put there + Avail_Out : UInt := 0; -- remaining free space at next_out + Total_Out : ULong := 0; -- total nb of bytes output so far + msg : Chars_Ptr; -- last error message, NULL if no error + state : Voidp; -- not visible by applications + zalloc : alloc_func := null; -- used to allocate the internal state + zfree : free_func := null; -- used to free the internal state + opaque : Voidp; -- private data object passed to + -- zalloc and zfree + data_type : Int; -- best guess about the data type: + -- ascii or binary + adler : ULong; -- adler32 value of the uncompressed + -- data + reserved : ULong; -- reserved for future use + end record; + + pragma Convention (C, Z_Stream); + + pragma Import (C, zlibVersion, "zlibVersion"); + pragma Import (C, Deflate, "deflate"); + pragma Import (C, DeflateEnd, "deflateEnd"); + pragma Import (C, Inflate, "inflate"); + pragma Import (C, InflateEnd, "inflateEnd"); + pragma Import (C, deflateSetDictionary, "deflateSetDictionary"); + pragma Import (C, deflateCopy, "deflateCopy"); + pragma Import (C, deflateReset, "deflateReset"); + pragma Import (C, deflateParams, "deflateParams"); + pragma Import (C, inflateSetDictionary, "inflateSetDictionary"); + pragma Import (C, inflateSync, "inflateSync"); + pragma Import (C, inflateReset, "inflateReset"); + pragma Import (C, compress, "compress"); + pragma Import (C, compress2, "compress2"); + pragma Import (C, uncompress, "uncompress"); + pragma Import (C, gzopen, "gzopen"); + pragma Import (C, gzdopen, "gzdopen"); + pragma Import (C, gzsetparams, "gzsetparams"); + pragma Import (C, gzread, "gzread"); + pragma Import (C, gzwrite, "gzwrite"); + pragma Import (C, gzprintf, "gzprintf"); + pragma Import (C, gzputs, "gzputs"); + pragma Import (C, gzgets, "gzgets"); + pragma Import (C, gzputc, "gzputc"); + pragma Import (C, gzgetc, "gzgetc"); + pragma Import (C, gzflush, "gzflush"); + pragma Import (C, gzseek, "gzseek"); + pragma Import (C, gzrewind, "gzrewind"); + pragma Import (C, gztell, "gztell"); + pragma Import (C, gzeof, "gzeof"); + pragma Import (C, gzclose, "gzclose"); + pragma Import (C, gzerror, "gzerror"); + pragma Import (C, adler32, "adler32"); + pragma Import (C, crc32, "crc32"); + pragma Import (C, deflateInit, "deflateInit_"); + pragma Import (C, inflateInit, "inflateInit_"); + pragma Import (C, deflateInit2, "deflateInit2_"); + pragma Import (C, inflateInit2, "inflateInit2_"); + pragma Import (C, zError, "zError"); + pragma Import (C, inflateSyncPoint, "inflateSyncPoint"); + pragma Import (C, get_crc_table, "get_crc_table"); + + -- since zlib 1.2.0: + + pragma Import (C, inflateCopy, "inflateCopy"); + pragma Import (C, compressBound, "compressBound"); + pragma Import (C, deflateBound, "deflateBound"); + pragma Import (C, gzungetc, "gzungetc"); + pragma Import (C, zlibCompileFlags, "zlibCompileFlags"); + + pragma Import (C, inflateBackInit, "inflateBackInit_"); + + -- I stopped binding the inflateBack routines, becouse realize that + -- it does not support zlib and gzip headers for now, and have no + -- symmetric deflateBack routines. + -- ZLib-Ada is symmetric regarding deflate/inflate data transformation + -- and has a similar generic callback interface for the + -- deflate/inflate transformation based on the regular Deflate/Inflate + -- routines. + + -- pragma Import (C, inflateBack, "inflateBack"); + -- pragma Import (C, inflateBackEnd, "inflateBackEnd"); + +end ZLib.Thin; Added: external/zlib/contrib/ada/zlib.adb ============================================================================== --- (empty file) +++ external/zlib/contrib/ada/zlib.adb Tue Jan 3 07:42:59 2006 @@ -0,0 +1,701 @@ +---------------------------------------------------------------- +-- ZLib for Ada thick binding. -- +-- -- +-- Copyright (C) 2002-2004 Dmitriy Anisimkov -- +-- -- +-- Open source license information is in the zlib.ads file. -- +---------------------------------------------------------------- + +-- $Id: zlib.adb,v 1.31 2004/09/06 06:53:19 vagul Exp $ + +with Ada.Exceptions; +with Ada.Unchecked_Conversion; +with Ada.Unchecked_Deallocation; + +with Interfaces.C.Strings; + +with ZLib.Thin; + +package body ZLib is + + use type Thin.Int; + + type Z_Stream is new Thin.Z_Stream; + + type Return_Code_Enum is + (OK, + STREAM_END, + NEED_DICT, + ERRNO, + STREAM_ERROR, + DATA_ERROR, + MEM_ERROR, + BUF_ERROR, + VERSION_ERROR); + + type Flate_Step_Function is access + function (Strm : in Thin.Z_Streamp; Flush : in Thin.Int) return Thin.Int; + pragma Convention (C, Flate_Step_Function); + + type Flate_End_Function is access + function (Ctrm : in Thin.Z_Streamp) return Thin.Int; + pragma Convention (C, Flate_End_Function); + + type Flate_Type is record + Step : Flate_Step_Function; + Done : Flate_End_Function; + end record; + + subtype Footer_Array is Stream_Element_Array (1 .. 8); + + Simple_GZip_Header : constant Stream_Element_Array (1 .. 10) + := (16#1f#, 16#8b#, -- Magic header + 16#08#, -- Z_DEFLATED + 16#00#, -- Flags + 16#00#, 16#00#, 16#00#, 16#00#, -- Time + 16#00#, -- XFlags + 16#03# -- OS code + ); + -- The simplest gzip header is not for informational, but just for + -- gzip format compatibility. + -- Note that some code below is using assumption + -- Simple_GZip_Header'Last > Footer_Array'Last, so do not make + -- Simple_GZip_Header'Last <= Footer_Array'Last. + + Return_Code : constant array (Thin.Int range <>) of Return_Code_Enum + := (0 => OK, + 1 => STREAM_END, + 2 => NEED_DICT, + -1 => ERRNO, + -2 => STREAM_ERROR, + -3 => DATA_ERROR, + -4 => MEM_ERROR, + -5 => BUF_ERROR, + -6 => VERSION_ERROR); + + Flate : constant array (Boolean) of Flate_Type + := (True => (Step => Thin.Deflate'Access, + Done => Thin.DeflateEnd'Access), + False => (Step => Thin.Inflate'Access, + Done => Thin.InflateEnd'Access)); + + Flush_Finish : constant array (Boolean) of Flush_Mode + := (True => Finish, False => No_Flush); + + procedure Raise_Error (Stream : in Z_Stream); + pragma Inline (Raise_Error); + + procedure Raise_Error (Message : in String); + pragma Inline (Raise_Error); + + procedure Check_Error (Stream : in Z_Stream; Code : in Thin.Int); + + procedure Free is new Ada.Unchecked_Deallocation + (Z_Stream, Z_Stream_Access); + + function To_Thin_Access is new Ada.Unchecked_Conversion + (Z_Stream_Access, Thin.Z_Streamp); + + procedure Translate_GZip + (Filter : in out Filter_Type; + In_Data : in Ada.Streams.Stream_Element_Array; + In_Last : out Ada.Streams.Stream_Element_Offset; + Out_Data : out Ada.Streams.Stream_Element_Array; + Out_Last : out Ada.Streams.Stream_Element_Offset; + Flush : in Flush_Mode); + -- Separate translate routine for make gzip header. + + procedure Translate_Auto + (Filter : in out Filter_Type; + In_Data : in Ada.Streams.Stream_Element_Array; + In_Last : out Ada.Streams.Stream_Element_Offset; + Out_Data : out Ada.Streams.Stream_Element_Array; + Out_Last : out Ada.Streams.Stream_Element_Offset; + Flush : in Flush_Mode); + -- translate routine without additional headers. + + ----------------- + -- Check_Error -- + ----------------- + + procedure Check_Error (Stream : in Z_Stream; Code : in Thin.Int) is + use type Thin.Int; + begin + if Code /= Thin.Z_OK then + Raise_Error + (Return_Code_Enum'Image (Return_Code (Code)) + & ": " & Last_Error_Message (Stream)); + end if; + end Check_Error; + + ----------- + -- Close -- + ----------- + + procedure Close + (Filter : in out Filter_Type; + Ignore_Error : in Boolean := False) + is + Code : Thin.Int; + begin + if not Ignore_Error and then not Is_Open (Filter) then + raise Status_Error; + end if; + + Code := Flate (Filter.Compression).Done (To_Thin_Access (Filter.Strm)); + + if Ignore_Error or else Code = Thin.Z_OK then + Free (Filter.Strm); + else + declare + Error_Message : constant String + := Last_Error_Message (Filter.Strm.all); + begin + Free (Filter.Strm); + Ada.Exceptions.Raise_Exception + (ZLib_Error'Identity, + Return_Code_Enum'Image (Return_Code (Code)) + & ": " & Error_Message); + end; + end if; + end Close; + + ----------- + -- CRC32 -- + ----------- + + function CRC32 + (CRC : in Unsigned_32; + Data : in Ada.Streams.Stream_Element_Array) + return Unsigned_32 + is + use Thin; + begin + return Unsigned_32 (crc32 (ULong (CRC), + Data'Address, + Data'Length)); + end CRC32; + + procedure CRC32 + (CRC : in out Unsigned_32; + Data : in Ada.Streams.Stream_Element_Array) is + begin + CRC := CRC32 (CRC, Data); + end CRC32; + + ------------------ + -- Deflate_Init -- + ------------------ + + procedure Deflate_Init + (Filter : in out Filter_Type; + Level : in Compression_Level := Default_Compression; + Strategy : in Strategy_Type := Default_Strategy; + Method : in Compression_Method := Deflated; + Window_Bits : in Window_Bits_Type := Default_Window_Bits; + Memory_Level : in Memory_Level_Type := Default_Memory_Level; + Header : in Header_Type := Default) + is + use type Thin.Int; + Win_Bits : Thin.Int := Thin.Int (Window_Bits); + begin + if Is_Open (Filter) then + raise Status_Error; + end if; + + -- We allow ZLib to make header only in case of default header type. + -- Otherwise we would either do header by ourselfs, or do not do + -- header at all. + + if Header = None or else Header = GZip then + Win_Bits := -Win_Bits; + end if; + + -- For the GZip CRC calculation and make headers. + + if Header = GZip then + Filter.CRC := 0; + Filter.Offset := Simple_GZip_Header'First; + else + Filter.Offset := Simple_GZip_Header'Last + 1; + end if; + + Filter.Strm := new Z_Stream; + Filter.Compression := True; + Filter.Stream_End := False; + Filter.Header := Header; + + if Thin.Deflate_Init + (To_Thin_Access (Filter.Strm), + Level => Thin.Int (Level), + method => Thin.Int (Method), + windowBits => Win_Bits, + memLevel => Thin.Int (Memory_Level), + strategy => Thin.Int (Strategy)) /= Thin.Z_OK + then + Raise_Error (Filter.Strm.all); + end if; + end Deflate_Init; + + ----------- + -- Flush -- + ----------- + + procedure Flush + (Filter : in out Filter_Type; + Out_Data : out Ada.Streams.Stream_Element_Array; + Out_Last : out Ada.Streams.Stream_Element_Offset; + Flush : in Flush_Mode) + is + No_Data : Stream_Element_Array := (1 .. 0 => 0); + Last : Stream_Element_Offset; + begin + Translate (Filter, No_Data, Last, Out_Data, Out_Last, Flush); + end Flush; + + ----------------------- + -- Generic_Translate -- + ----------------------- + + procedure Generic_Translate + (Filter : in out ZLib.Filter_Type; + In_Buffer_Size : in Integer := Default_Buffer_Size; + Out_Buffer_Size : in Integer := Default_Buffer_Size) + is + In_Buffer : Stream_Element_Array + (1 .. Stream_Element_Offset (In_Buffer_Size)); + Out_Buffer : Stream_Element_Array + (1 .. Stream_Element_Offset (Out_Buffer_Size)); + Last : Stream_Element_Offset; + In_Last : Stream_Element_Offset; + In_First : Stream_Element_Offset; + Out_Last : Stream_Element_Offset; + begin + Main : loop + Data_In (In_Buffer, Last); + + In_First := In_Buffer'First; + + loop + Translate + (Filter => Filter, + In_Data => In_Buffer (In_First .. Last), + In_Last => In_Last, + Out_Data => Out_Buffer, + Out_Last => Out_Last, + Flush => Flush_Finish (Last < In_Buffer'First)); + + if Out_Buffer'First <= Out_Last then + Data_Out (Out_Buffer (Out_Buffer'First .. Out_Last)); + end if; + + exit Main when Stream_End (Filter); + + -- The end of in buffer. + + exit when In_Last = Last; + + In_First := In_Last + 1; + end loop; + end loop Main; + + end Generic_Translate; + + ------------------ + -- Inflate_Init -- + ------------------ + + procedure Inflate_Init + (Filter : in out Filter_Type; + Window_Bits : in Window_Bits_Type := Default_Window_Bits; + Header : in Header_Type := Default) + is + use type Thin.Int; + Win_Bits : Thin.Int := Thin.Int (Window_Bits); + + procedure Check_Version; + -- Check the latest header types compatibility. + + procedure Check_Version is + begin + if Version <= "1.1.4" then + Raise_Error + ("Inflate header type " & Header_Type'Image (Header) + & " incompatible with ZLib version " & Version); + end if; + end Check_Version; + + begin + if Is_Open (Filter) then + raise Status_Error; + end if; + + case Header is + when None => + Check_Version; + + -- Inflate data without headers determined + -- by negative Win_Bits. + + Win_Bits := -Win_Bits; + when GZip => + Check_Version; + + -- Inflate gzip data defined by flag 16. + + Win_Bits := Win_Bits + 16; + when Auto => + Check_Version; + + -- Inflate with automatic detection + -- of gzip or native header defined by flag 32. + + Win_Bits := Win_Bits + 32; + when Default => null; + end case; + + Filter.Strm := new Z_Stream; + Filter.Compression := False; + Filter.Stream_End := False; + Filter.Header := Header; + + if Thin.Inflate_Init + (To_Thin_Access (Filter.Strm), Win_Bits) /= Thin.Z_OK + then + Raise_Error (Filter.Strm.all); + end if; + end Inflate_Init; + + ------------- + -- Is_Open -- + ------------- + + function Is_Open (Filter : in Filter_Type) return Boolean is + begin + return Filter.Strm /= null; + end Is_Open; + + ----------------- + -- Raise_Error -- + ----------------- + + procedure Raise_Error (Message : in String) is + begin + Ada.Exceptions.Raise_Exception (ZLib_Error'Identity, Message); + end Raise_Error; + + procedure Raise_Error (Stream : in Z_Stream) is + begin + Raise_Error (Last_Error_Message (Stream)); + end Raise_Error; + + ---------- + -- Read -- + ---------- + + procedure Read + (Filter : in out Filter_Type; + Item : out Ada.Streams.Stream_Element_Array; + Last : out Ada.Streams.Stream_Element_Offset; + Flush : in Flush_Mode := No_Flush) + is + In_Last : Stream_Element_Offset; + Item_First : Ada.Streams.Stream_Element_Offset := Item'First; + V_Flush : Flush_Mode := Flush; + + begin + pragma Assert (Rest_First in Buffer'First .. Buffer'Last + 1); + pragma Assert (Rest_Last in Buffer'First - 1 .. Buffer'Last); + + loop + if Rest_Last = Buffer'First - 1 then + V_Flush := Finish; + + elsif Rest_First > Rest_Last then + Read (Buffer, Rest_Last); + Rest_First := Buffer'First; + + if Rest_Last < Buffer'First then + V_Flush := Finish; + end if; + end if; + + Translate + (Filter => Filter, + In_Data => Buffer (Rest_First .. Rest_Last), + In_Last => In_Last, + Out_Data => Item (Item_First .. Item'Last), + Out_Last => Last, + Flush => V_Flush); + + Rest_First := In_Last + 1; + + exit when Stream_End (Filter) + or else Last = Item'Last + or else (Last >= Item'First and then Allow_Read_Some); + + Item_First := Last + 1; + end loop; + end Read; + + ---------------- + -- Stream_End -- + ---------------- + + function Stream_End (Filter : in Filter_Type) return Boolean is + begin + if Filter.Header = GZip and Filter.Compression then + return Filter.Stream_End + and then Filter.Offset = Footer_Array'Last + 1; + else + return Filter.Stream_End; + end if; + end Stream_End; + + -------------- + -- Total_In -- + -------------- + + function Total_In (Filter : in Filter_Type) return Count is + begin + return Count (Thin.Total_In (To_Thin_Access (Filter.Strm).all)); + end Total_In; + + --------------- + -- Total_Out -- + --------------- + + function Total_Out (Filter : in Filter_Type) return Count is + begin + return Count (Thin.Total_Out (To_Thin_Access (Filter.Strm).all)); + end Total_Out; + + --------------- + -- Translate -- + --------------- + + procedure Translate + (Filter : in out Filter_Type; + In_Data : in Ada.Streams.Stream_Element_Array; + In_Last : out Ada.Streams.Stream_Element_Offset; + Out_Data : out Ada.Streams.Stream_Element_Array; + Out_Last : out Ada.Streams.Stream_Element_Offset; + Flush : in Flush_Mode) is + begin + if Filter.Header = GZip and then Filter.Compression then + Translate_GZip + (Filter => Filter, + In_Data => In_Data, + In_Last => In_Last, + Out_Data => Out_Data, + Out_Last => Out_Last, + Flush => Flush); + else + Translate_Auto + (Filter => Filter, + In_Data => In_Data, + In_Last => In_Last, + Out_Data => Out_Data, + Out_Last => Out_Last, + Flush => Flush); + end if; + end Translate; + + -------------------- + -- Translate_Auto -- + -------------------- + + procedure Translate_Auto + (Filter : in out Filter_Type; + In_Data : in Ada.Streams.Stream_Element_Array; + In_Last : out Ada.Streams.Stream_Element_Offset; + Out_Data : out Ada.Streams.Stream_Element_Array; + Out_Last : out Ada.Streams.Stream_Element_Offset; + Flush : in Flush_Mode) + is + use type Thin.Int; + Code : Thin.Int; + + begin + if not Is_Open (Filter) then + raise Status_Error; + end if; + + if Out_Data'Length = 0 and then In_Data'Length = 0 then + raise Constraint_Error; + end if; + + Set_Out (Filter.Strm.all, Out_Data'Address, Out_Data'Length); + Set_In (Filter.Strm.all, In_Data'Address, In_Data'Length); + + Code := Flate (Filter.Compression).Step + (To_Thin_Access (Filter.Strm), + Thin.Int (Flush)); + + if Code = Thin.Z_STREAM_END then + Filter.Stream_End := True; + else + Check_Error (Filter.Strm.all, Code); + end if; + + In_Last := In_Data'Last + - Stream_Element_Offset (Avail_In (Filter.Strm.all)); + Out_Last := Out_Data'Last + - Stream_Element_Offset (Avail_Out (Filter.Strm.all)); + end Translate_Auto; + + -------------------- + -- Translate_GZip -- + -------------------- + + procedure Translate_GZip + (Filter : in out Filter_Type; + In_Data : in Ada.Streams.Stream_Element_Array; + In_Last : out Ada.Streams.Stream_Element_Offset; + Out_Data : out Ada.Streams.Stream_Element_Array; + Out_Last : out Ada.Streams.Stream_Element_Offset; + Flush : in Flush_Mode) + is + Out_First : Stream_Element_Offset; + + procedure Add_Data (Data : in Stream_Element_Array); + -- Add data to stream from the Filter.Offset till necessary, + -- used for add gzip headr/footer. + + procedure Put_32 + (Item : in out Stream_Element_Array; + Data : in Unsigned_32); + pragma Inline (Put_32); + + -------------- + -- Add_Data -- + -------------- + + procedure Add_Data (Data : in Stream_Element_Array) is + Data_First : Stream_Element_Offset renames Filter.Offset; + Data_Last : Stream_Element_Offset; + Data_Len : Stream_Element_Offset; -- -1 + Out_Len : Stream_Element_Offset; -- -1 + begin + Out_First := Out_Last + 1; + + if Data_First > Data'Last then + return; + end if; + + Data_Len := Data'Last - Data_First; + Out_Len := Out_Data'Last - Out_First; + + if Data_Len <= Out_Len then + Out_Last := Out_First + Data_Len; + Data_Last := Data'Last; + else + Out_Last := Out_Data'Last; + Data_Last := Data_First + Out_Len; + end if; + + Out_Data (Out_First .. Out_Last) := Data (Data_First .. Data_Last); + + Data_First := Data_Last + 1; + Out_First := Out_Last + 1; + end Add_Data; + + ------------ + -- Put_32 -- + ------------ + + procedure Put_32 + (Item : in out Stream_Element_Array; + Data : in Unsigned_32) + is + D : Unsigned_32 := Data; + begin + for J in Item'First .. Item'First + 3 loop + Item (J) := Stream_Element (D and 16#FF#); + D := Shift_Right (D, 8); + end loop; + end Put_32; + + begin + Out_Last := Out_Data'First - 1; + + if not Filter.Stream_End then + Add_Data (Simple_GZip_Header); + + Translate_Auto + (Filter => Filter, + In_Data => In_Data, + In_Last => In_Last, + Out_Data => Out_Data (Out_First .. Out_Data'Last), + Out_Last => Out_Last, + Flush => Flush); + + CRC32 (Filter.CRC, In_Data (In_Data'First .. In_Last)); + end if; + + if Filter.Stream_End and then Out_Last <= Out_Data'Last then + -- This detection method would work only when + -- Simple_GZip_Header'Last > Footer_Array'Last + + if Filter.Offset = Simple_GZip_Header'Last + 1 then + Filter.Offset := Footer_Array'First; + end if; + + declare + Footer : Footer_Array; + begin + Put_32 (Footer, Filter.CRC); + Put_32 (Footer (Footer'First + 4 .. Footer'Last), + Unsigned_32 (Total_In (Filter))); + Add_Data (Footer); + end; + end if; + end Translate_GZip; + + ------------- + -- Version -- + ------------- + + function Version return String is + begin + return Interfaces.C.Strings.Value (Thin.zlibVersion); + end Version; + + ----------- + -- Write -- + ----------- + + procedure Write + (Filter : in out Filter_Type; + Item : in Ada.Streams.Stream_Element_Array; + Flush : in Flush_Mode := No_Flush) + is + Buffer : Stream_Element_Array (1 .. Buffer_Size); + In_Last : Stream_Element_Offset; + Out_Last : Stream_Element_Offset; + In_First : Stream_Element_Offset := Item'First; + begin + if Item'Length = 0 and Flush = No_Flush then + return; + end if; + + loop + Translate + (Filter => Filter, + In_Data => Item (In_First .. Item'Last), + In_Last => In_Last, + Out_Data => Buffer, + Out_Last => Out_Last, + Flush => Flush); + + if Out_Last >= Buffer'First then + Write (Buffer (1 .. Out_Last)); + end if; + + exit when In_Last = Item'Last or Stream_End (Filter); + + In_First := In_Last + 1; + end loop; + end Write; + +end ZLib; Added: external/zlib/contrib/ada/zlib.ads ============================================================================== --- (empty file) +++ external/zlib/contrib/ada/zlib.ads Tue Jan 3 07:42:59 2006 @@ -0,0 +1,328 @@ +------------------------------------------------------------------------------ +-- ZLib for Ada thick binding. -- +-- -- +-- Copyright (C) 2002-2004 Dmitriy Anisimkov -- +-- -- +-- This library is free software; you can redistribute it and/or modify -- +-- it under the terms of the GNU General Public License as published by -- +-- the Free Software Foundation; either version 2 of the License, or (at -- +-- your option) any later version. -- +-- -- +-- This library is distributed in the hope that it will be useful, but -- +-- WITHOUT ANY WARRANTY; without even the implied warranty of -- +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- +-- General Public License for more details. -- +-- -- +-- You should have received a copy of the GNU General Public License -- +-- along with this library; if not, write to the Free Software Foundation, -- +-- Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -- +-- -- +-- As a special exception, if other files instantiate generics from this -- +-- unit, or you link this unit with other files to produce an executable, -- +-- this unit does not by itself cause the resulting executable to be -- +-- covered by the GNU General Public License. This exception does not -- +-- however invalidate any other reasons why the executable file might be -- +-- covered by the GNU Public License. -- +------------------------------------------------------------------------------ + +-- $Id: zlib.ads,v 1.26 2004/09/06 06:53:19 vagul Exp $ + +with Ada.Streams; + +with Interfaces; + +package ZLib is + + ZLib_Error : exception; + Status_Error : exception; + + type Compression_Level is new Integer range -1 .. 9; + + type Flush_Mode is private; + + type Compression_Method is private; + + type Window_Bits_Type is new Integer range 8 .. 15; + + type Memory_Level_Type is new Integer range 1 .. 9; + + type Unsigned_32 is new Interfaces.Unsigned_32; + + type Strategy_Type is private; + + type Header_Type is (None, Auto, Default, GZip); + -- Header type usage have a some limitation for inflate. + -- See comment for Inflate_Init. + + subtype Count is Ada.Streams.Stream_Element_Count; + + Default_Memory_Level : constant Memory_Level_Type := 8; + Default_Window_Bits : constant Window_Bits_Type := 15; + + ---------------------------------- + -- Compression method constants -- + ---------------------------------- + + Deflated : constant Compression_Method; + -- Only one method allowed in this ZLib version + + --------------------------------- + -- Compression level constants -- + --------------------------------- + + No_Compression : constant Compression_Level := 0; + Best_Speed : constant Compression_Level := 1; + Best_Compression : constant Compression_Level := 9; + Default_Compression : constant Compression_Level := -1; + + -------------------------- + -- Flush mode constants -- + -------------------------- + + No_Flush : constant Flush_Mode; + -- Regular way for compression, no flush + + Partial_Flush : constant Flush_Mode; + -- Will be removed, use Z_SYNC_FLUSH instead + + Sync_Flush : constant Flush_Mode; + -- All pending output is flushed to the output buffer and the output + -- is aligned on a byte boundary, so that the decompressor can get all + -- input data available so far. (In particular avail_in is zero after the + -- call if enough output space has been provided before the call.) + -- Flushing may degrade compression for some compression algorithms and so + -- it should be used only when necessary. + + Block_Flush : constant Flush_Mode; + -- Z_BLOCK requests that inflate() stop + -- if and when it get to the next deflate block boundary. When decoding the + -- zlib or gzip format, this will cause inflate() to return immediately + -- after the header and before the first block. When doing a raw inflate, + -- inflate() will go ahead and process the first block, and will return + -- when it gets to the end of that block, or when it runs out of data. + + Full_Flush : constant Flush_Mode; + -- All output is flushed as with SYNC_FLUSH, and the compression state + -- is reset so that decompression can restart from this point if previous + -- compressed data has been damaged or if random access is desired. Using + -- Full_Flush too often can seriously degrade the compression. + + Finish : constant Flush_Mode; + -- Just for tell the compressor that input data is complete. + + ------------------------------------ + -- Compression strategy constants -- + ------------------------------------ + + -- RLE stategy could be used only in version 1.2.0 and later. + + Filtered : constant Strategy_Type; + Huffman_Only : constant Strategy_Type; + RLE : constant Strategy_Type; + Default_Strategy : constant Strategy_Type; + + Default_Buffer_Size : constant := 4096; + + type Filter_Type is tagged limited private; + -- The filter is for compression and for decompression. + -- The usage of the type is depend of its initialization. + + function Version return String; + pragma Inline (Version); + -- Return string representation of the ZLib version. + + procedure Deflate_Init + (Filter : in out Filter_Type; + Level : in Compression_Level := Default_Compression; + Strategy : in Strategy_Type := Default_Strategy; + Method : in Compression_Method := Deflated; + Window_Bits : in Window_Bits_Type := Default_Window_Bits; + Memory_Level : in Memory_Level_Type := Default_Memory_Level; + Header : in Header_Type := Default); + -- Compressor initialization. + -- When Header parameter is Auto or Default, then default zlib header + -- would be provided for compressed data. + -- When Header is GZip, then gzip header would be set instead of + -- default header. + -- When Header is None, no header would be set for compressed data. + + procedure Inflate_Init + (Filter : in out Filter_Type; + Window_Bits : in Window_Bits_Type := Default_Window_Bits; + Header : in Header_Type := Default); + -- Decompressor initialization. + -- Default header type mean that ZLib default header is expecting in the + -- input compressed stream. + -- Header type None mean that no header is expecting in the input stream. + -- GZip header type mean that GZip header is expecting in the + -- input compressed stream. + -- Auto header type mean that header type (GZip or Native) would be + -- detected automatically in the input stream. + -- Note that header types parameter values None, GZip and Auto are + -- supported for inflate routine only in ZLib versions 1.2.0.2 and later. + -- Deflate_Init is supporting all header types. + + function Is_Open (Filter : in Filter_Type) return Boolean; + pragma Inline (Is_Open); + -- Is the filter opened for compression or decompression. + + procedure Close + (Filter : in out Filter_Type; + Ignore_Error : in Boolean := False); + -- Closing the compression or decompressor. + -- If stream is closing before the complete and Ignore_Error is False, + -- The exception would be raised. + + generic + with procedure Data_In + (Item : out Ada.Streams.Stream_Element_Array; + Last : out Ada.Streams.Stream_Element_Offset); + with procedure Data_Out + (Item : in Ada.Streams.Stream_Element_Array); + procedure Generic_Translate + (Filter : in out Filter_Type; + In_Buffer_Size : in Integer := Default_Buffer_Size; + Out_Buffer_Size : in Integer := Default_Buffer_Size); + -- Compress/decompress data fetch from Data_In routine and pass the result + -- to the Data_Out routine. User should provide Data_In and Data_Out + -- for compression/decompression data flow. + -- Compression or decompression depend on Filter initialization. + + function Total_In (Filter : in Filter_Type) return Count; + pragma Inline (Total_In); + -- Returns total number of input bytes read so far + + function Total_Out (Filter : in Filter_Type) return Count; + pragma Inline (Total_Out); + -- Returns total number of bytes output so far + + function CRC32 + (CRC : in Unsigned_32; + Data : in Ada.Streams.Stream_Element_Array) + return Unsigned_32; + pragma Inline (CRC32); + -- Compute CRC32, it could be necessary for make gzip format + + procedure CRC32 + (CRC : in out Unsigned_32; + Data : in Ada.Streams.Stream_Element_Array); + pragma Inline (CRC32); + -- Compute CRC32, it could be necessary for make gzip format + + ------------------------------------------------- + -- Below is more complex low level routines. -- + ------------------------------------------------- + + procedure Translate + (Filter : in out Filter_Type; + In_Data : in Ada.Streams.Stream_Element_Array; + In_Last : out Ada.Streams.Stream_Element_Offset; + Out_Data : out Ada.Streams.Stream_Element_Array; + Out_Last : out Ada.Streams.Stream_Element_Offset; + Flush : in Flush_Mode); + -- Compress/decompress the In_Data buffer and place the result into + -- Out_Data. In_Last is the index of last element from In_Data accepted by + -- the Filter. Out_Last is the last element of the received data from + -- Filter. To tell the filter that incoming data are complete put the + -- Flush parameter to Finish. + + function Stream_End (Filter : in Filter_Type) return Boolean; + pragma Inline (Stream_End); + -- Return the true when the stream is complete. + + procedure Flush + (Filter : in out Filter_Type; + Out_Data : out Ada.Streams.Stream_Element_Array; + Out_Last : out Ada.Streams.Stream_Element_Offset; + Flush : in Flush_Mode); + pragma Inline (Flush); + -- Flushing the data from the compressor. + + generic + with procedure Write + (Item : in Ada.Streams.Stream_Element_Array); + -- User should provide this routine for accept + -- compressed/decompressed data. + + Buffer_Size : in Ada.Streams.Stream_Element_Offset + := Default_Buffer_Size; + -- Buffer size for Write user routine. + + procedure Write + (Filter : in out Filter_Type; + Item : in Ada.Streams.Stream_Element_Array; + Flush : in Flush_Mode := No_Flush); + -- Compress/Decompress data from Item to the generic parameter procedure + -- Write. Output buffer size could be set in Buffer_Size generic parameter. + + generic + with procedure Read + (Item : out Ada.Streams.Stream_Element_Array; + Last : out Ada.Streams.Stream_Element_Offset); + -- User should provide data for compression/decompression + -- thru this routine. + + Buffer : in out Ada.Streams.Stream_Element_Array; + -- Buffer for keep remaining data from the previous + -- back read. + + Rest_First, Rest_Last : in out Ada.Streams.Stream_Element_Offset; + -- Rest_First have to be initialized to Buffer'Last + 1 + -- Rest_Last have to be initialized to Buffer'Last + -- before usage. + + Allow_Read_Some : in Boolean := False; + -- Is it allowed to return Last < Item'Last before end of data. + + procedure Read + (Filter : in out Filter_Type; + Item : out Ada.Streams.Stream_Element_Array; + Last : out Ada.Streams.Stream_Element_Offset; + Flush : in Flush_Mode := No_Flush); + -- Compress/Decompress data from generic parameter procedure Read to the + -- Item. User should provide Buffer and initialized Rest_First, Rest_Last + -- indicators. If Allow_Read_Some is True, Read routines could return + -- Last < Item'Last only at end of stream. + +private + + use Ada.Streams; + + pragma Assert (Ada.Streams.Stream_Element'Size = 8); + pragma Assert (Ada.Streams.Stream_Element'Modulus = 2**8); + + type Flush_Mode is new Integer range 0 .. 5; + + type Compression_Method is new Integer range 8 .. 8; + + type Strategy_Type is new Integer range 0 .. 3; + + No_Flush : constant Flush_Mode := 0; + Partial_Flush : constant Flush_Mode := 1; + Sync_Flush : constant Flush_Mode := 2; + Full_Flush : constant Flush_Mode := 3; + Finish : constant Flush_Mode := 4; + Block_Flush : constant Flush_Mode := 5; + + Filtered : constant Strategy_Type := 1; + Huffman_Only : constant Strategy_Type := 2; + RLE : constant Strategy_Type := 3; + Default_Strategy : constant Strategy_Type := 0; + + Deflated : constant Compression_Method := 8; + + type Z_Stream; + + type Z_Stream_Access is access all Z_Stream; + + type Filter_Type is tagged limited record + Strm : Z_Stream_Access; + Compression : Boolean; + Stream_End : Boolean; + Header : Header_Type; + CRC : Unsigned_32; + Offset : Stream_Element_Offset; + -- Offset for gzip header/footer output. + end record; + +end ZLib; Added: external/zlib/contrib/ada/zlib.gpr ============================================================================== --- (empty file) +++ external/zlib/contrib/ada/zlib.gpr Tue Jan 3 07:42:59 2006 @@ -0,0 +1,20 @@ +project Zlib is + + for Languages use ("Ada"); + for Source_Dirs use ("."); + for Object_Dir use "."; + for Main use ("test.adb", "mtest.adb", "read.adb", "buffer_demo"); + + package Compiler is + for Default_Switches ("ada") use ("-gnatwcfilopru", "-gnatVcdfimorst", "-gnatyabcefhiklmnoprst"); + end Compiler; + + package Linker is + for Default_Switches ("ada") use ("-lz"); + end Linker; + + package Builder is + for Default_Switches ("ada") use ("-s", "-gnatQ"); + end Builder; + +end Zlib; Added: external/zlib/contrib/asm586/README.586 ============================================================================== --- (empty file) +++ external/zlib/contrib/asm586/README.586 Tue Jan 3 07:42:59 2006 @@ -0,0 +1,43 @@ +This is a patched version of zlib modified to use +Pentium-optimized assembly code in the deflation algorithm. The files +changed/added by this patch are: + +README.586 +match.S + +The effectiveness of these modifications is a bit marginal, as the the +program's bottleneck seems to be mostly L1-cache contention, for which +there is no real way to work around without rewriting the basic +algorithm. The speedup on average is around 5-10% (which is generally +less than the amount of variance between subsequent executions). +However, when used at level 9 compression, the cache contention can +drop enough for the assembly version to achieve 10-20% speedup (and +sometimes more, depending on the amount of overall redundancy in the +files). Even here, though, cache contention can still be the limiting +factor, depending on the nature of the program using the zlib library. +This may also mean that better improvements will be seen on a Pentium +with MMX, which suffers much less from L1-cache contention, but I have +not yet verified this. + +Note that this code has been tailored for the Pentium in particular, +and will not perform well on the Pentium Pro (due to the use of a +partial register in the inner loop). + +If you are using an assembler other than GNU as, you will have to +translate match.S to use your assembler's syntax. (Have fun.) + +Brian Raiter +breadbox at muppetlabs.com +April, 1998 + + +Added for zlib 1.1.3: + +The patches come from +http://www.muppetlabs.com/~breadbox/software/assembly.html + +To compile zlib with this asm file, copy match.S to the zlib directory +then do: + +CFLAGS="-O3 -DASMV" ./configure +make OBJA=match.o Added: external/zlib/contrib/asm586/match.S ============================================================================== --- (empty file) +++ external/zlib/contrib/asm586/match.S Tue Jan 3 07:42:59 2006 @@ -0,0 +1,364 @@ +/* match.s -- Pentium-optimized version of longest_match() + * Written for zlib 1.1.2 + * Copyright (C) 1998 Brian Raiter + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License. + */ + +#ifndef NO_UNDERLINE +#define match_init _match_init +#define longest_match _longest_match +#endif + +#define MAX_MATCH (258) +#define MIN_MATCH (3) +#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1) +#define MAX_MATCH_8 ((MAX_MATCH + 7) & ~7) + +/* stack frame offsets */ + +#define wmask 0 /* local copy of s->wmask */ +#define window 4 /* local copy of s->window */ +#define windowbestlen 8 /* s->window + bestlen */ +#define chainlenscanend 12 /* high word: current chain len */ + /* low word: last bytes sought */ +#define scanstart 16 /* first two bytes of string */ +#define scanalign 20 /* dword-misalignment of string */ +#define nicematch 24 /* a good enough match size */ +#define bestlen 28 /* size of best match so far */ +#define scan 32 /* ptr to string wanting match */ + +#define LocalVarsSize (36) +/* saved ebx 36 */ +/* saved edi 40 */ +/* saved esi 44 */ +/* saved ebp 48 */ +/* return address 52 */ +#define deflatestate 56 /* the function arguments */ +#define curmatch 60 + +/* Offsets for fields in the deflate_state structure. These numbers + * are calculated from the definition of deflate_state, with the + * assumption that the compiler will dword-align the fields. (Thus, + * changing the definition of deflate_state could easily cause this + * program to crash horribly, without so much as a warning at + * compile time. Sigh.) + */ + +/* All the +zlib1222add offsets are due to the addition of fields + * in zlib in the deflate_state structure since the asm code was first written + * (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). + * (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). + * if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). + */ + +#define zlib1222add (8) + +#define dsWSize (36+zlib1222add) +#define dsWMask (44+zlib1222add) +#define dsWindow (48+zlib1222add) +#define dsPrev (56+zlib1222add) +#define dsMatchLen (88+zlib1222add) +#define dsPrevMatch (92+zlib1222add) +#define dsStrStart (100+zlib1222add) +#define dsMatchStart (104+zlib1222add) +#define dsLookahead (108+zlib1222add) +#define dsPrevLen (112+zlib1222add) +#define dsMaxChainLen (116+zlib1222add) +#define dsGoodMatch (132+zlib1222add) +#define dsNiceMatch (136+zlib1222add) + + +.file "match.S" + +.globl match_init, longest_match + +.text + +/* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */ + +longest_match: + +/* Save registers that the compiler may be using, and adjust %esp to */ +/* make room for our stack frame. */ + + pushl %ebp + pushl %edi + pushl %esi + pushl %ebx + subl $LocalVarsSize, %esp + +/* Retrieve the function arguments. %ecx will hold cur_match */ +/* throughout the entire function. %edx will hold the pointer to the */ +/* deflate_state structure during the function's setup (before */ +/* entering the main loop). */ + + movl deflatestate(%esp), %edx + movl curmatch(%esp), %ecx + +/* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; */ + + movl dsNiceMatch(%edx), %eax + movl dsLookahead(%edx), %ebx + cmpl %eax, %ebx + jl LookaheadLess + movl %eax, %ebx +LookaheadLess: movl %ebx, nicematch(%esp) + +/* register Bytef *scan = s->window + s->strstart; */ + + movl dsWindow(%edx), %esi + movl %esi, window(%esp) + movl dsStrStart(%edx), %ebp + lea (%esi,%ebp), %edi + movl %edi, scan(%esp) + +/* Determine how many bytes the scan ptr is off from being */ +/* dword-aligned. */ + + movl %edi, %eax + negl %eax + andl $3, %eax + movl %eax, scanalign(%esp) + +/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ +/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ + + movl dsWSize(%edx), %eax + subl $MIN_LOOKAHEAD, %eax + subl %eax, %ebp + jg LimitPositive + xorl %ebp, %ebp +LimitPositive: + +/* unsigned chain_length = s->max_chain_length; */ +/* if (s->prev_length >= s->good_match) { */ +/* chain_length >>= 2; */ +/* } */ + + movl dsPrevLen(%edx), %eax + movl dsGoodMatch(%edx), %ebx + cmpl %ebx, %eax + movl dsMaxChainLen(%edx), %ebx + jl LastMatchGood + shrl $2, %ebx +LastMatchGood: + +/* chainlen is decremented once beforehand so that the function can */ +/* use the sign flag instead of the zero flag for the exit test. */ +/* It is then shifted into the high word, to make room for the scanend */ +/* scanend value, which it will always accompany. */ + + decl %ebx + shll $16, %ebx + +/* int best_len = s->prev_length; */ + + movl dsPrevLen(%edx), %eax + movl %eax, bestlen(%esp) + +/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ + + addl %eax, %esi + movl %esi, windowbestlen(%esp) + +/* register ush scan_start = *(ushf*)scan; */ +/* register ush scan_end = *(ushf*)(scan+best_len-1); */ + + movw (%edi), %bx + movw %bx, scanstart(%esp) + movw -1(%edi,%eax), %bx + movl %ebx, chainlenscanend(%esp) + +/* Posf *prev = s->prev; */ +/* uInt wmask = s->w_mask; */ + + movl dsPrev(%edx), %edi + movl dsWMask(%edx), %edx + mov %edx, wmask(%esp) + +/* Jump into the main loop. */ + + jmp LoopEntry + +.balign 16 + +/* do { + * match = s->window + cur_match; + * if (*(ushf*)(match+best_len-1) != scan_end || + * *(ushf*)match != scan_start) continue; + * [...] + * } while ((cur_match = prev[cur_match & wmask]) > limit + * && --chain_length != 0); + * + * Here is the inner loop of the function. The function will spend the + * majority of its time in this loop, and majority of that time will + * be spent in the first ten instructions. + * + * Within this loop: + * %ebx = chainlenscanend - i.e., ((chainlen << 16) | scanend) + * %ecx = curmatch + * %edx = curmatch & wmask + * %esi = windowbestlen - i.e., (window + bestlen) + * %edi = prev + * %ebp = limit + * + * Two optimization notes on the choice of instructions: + * + * The first instruction uses a 16-bit address, which costs an extra, + * unpairable cycle. This is cheaper than doing a 32-bit access and + * zeroing the high word, due to the 3-cycle misalignment penalty which + * would occur half the time. This also turns out to be cheaper than + * doing two separate 8-bit accesses, as the memory is so rarely in the + * L1 cache. + * + * The window buffer, however, apparently spends a lot of time in the + * cache, and so it is faster to retrieve the word at the end of the + * match string with two 8-bit loads. The instructions that test the + * word at the beginning of the match string, however, are executed + * much less frequently, and there it was cheaper to use 16-bit + * instructions, which avoided the necessity of saving off and + * subsequently reloading one of the other registers. + */ +LookupLoop: + /* 1 U & V */ + movw (%edi,%edx,2), %cx /* 2 U pipe */ + movl wmask(%esp), %edx /* 2 V pipe */ + cmpl %ebp, %ecx /* 3 U pipe */ + jbe LeaveNow /* 3 V pipe */ + subl $0x00010000, %ebx /* 4 U pipe */ + js LeaveNow /* 4 V pipe */ +LoopEntry: movb -1(%esi,%ecx), %al /* 5 U pipe */ + andl %ecx, %edx /* 5 V pipe */ + cmpb %bl, %al /* 6 U pipe */ + jnz LookupLoop /* 6 V pipe */ + movb (%esi,%ecx), %ah + cmpb %bh, %ah + jnz LookupLoop + movl window(%esp), %eax + movw (%eax,%ecx), %ax + cmpw scanstart(%esp), %ax + jnz LookupLoop + +/* Store the current value of chainlen. */ + + movl %ebx, chainlenscanend(%esp) + +/* Point %edi to the string under scrutiny, and %esi to the string we */ +/* are hoping to match it up with. In actuality, %esi and %edi are */ +/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ +/* initialized to -(MAX_MATCH_8 - scanalign). */ + + movl window(%esp), %esi + movl scan(%esp), %edi + addl %ecx, %esi + movl scanalign(%esp), %eax + movl $(-MAX_MATCH_8), %edx + lea MAX_MATCH_8(%edi,%eax), %edi + lea MAX_MATCH_8(%esi,%eax), %esi + +/* Test the strings for equality, 8 bytes at a time. At the end, + * adjust %edx so that it is offset to the exact byte that mismatched. + * + * We already know at this point that the first three bytes of the + * strings match each other, and they can be safely passed over before + * starting the compare loop. So what this code does is skip over 0-3 + * bytes, as much as necessary in order to dword-align the %edi + * pointer. (%esi will still be misaligned three times out of four.) + * + * It should be confessed that this loop usually does not represent + * much of the total running time. Replacing it with a more + * straightforward "rep cmpsb" would not drastically degrade + * performance. + */ +LoopCmps: + movl (%esi,%edx), %eax + movl (%edi,%edx), %ebx + xorl %ebx, %eax + jnz LeaveLoopCmps + movl 4(%esi,%edx), %eax + movl 4(%edi,%edx), %ebx + xorl %ebx, %eax + jnz LeaveLoopCmps4 + addl $8, %edx + jnz LoopCmps + jmp LenMaximum +LeaveLoopCmps4: addl $4, %edx +LeaveLoopCmps: testl $0x0000FFFF, %eax + jnz LenLower + addl $2, %edx + shrl $16, %eax +LenLower: subb $1, %al + adcl $0, %edx + +/* Calculate the length of the match. If it is longer than MAX_MATCH, */ +/* then automatically accept it as the best possible match and leave. */ + + lea (%edi,%edx), %eax + movl scan(%esp), %edi + subl %edi, %eax + cmpl $MAX_MATCH, %eax + jge LenMaximum + +/* If the length of the match is not longer than the best match we */ +/* have so far, then forget it and return to the lookup loop. */ + + movl deflatestate(%esp), %edx + movl bestlen(%esp), %ebx + cmpl %ebx, %eax + jg LongerMatch + movl chainlenscanend(%esp), %ebx + movl windowbestlen(%esp), %esi + movl dsPrev(%edx), %edi + movl wmask(%esp), %edx + andl %ecx, %edx + jmp LookupLoop + +/* s->match_start = cur_match; */ +/* best_len = len; */ +/* if (len >= nice_match) break; */ +/* scan_end = *(ushf*)(scan+best_len-1); */ + +LongerMatch: movl nicematch(%esp), %ebx + movl %eax, bestlen(%esp) + movl %ecx, dsMatchStart(%edx) + cmpl %ebx, %eax + jge LeaveNow + movl window(%esp), %esi + addl %eax, %esi + movl %esi, windowbestlen(%esp) + movl chainlenscanend(%esp), %ebx + movw -1(%edi,%eax), %bx + movl dsPrev(%edx), %edi + movl %ebx, chainlenscanend(%esp) + movl wmask(%esp), %edx + andl %ecx, %edx + jmp LookupLoop + +/* Accept the current string, with the maximum possible length. */ + +LenMaximum: movl deflatestate(%esp), %edx + movl $MAX_MATCH, bestlen(%esp) + movl %ecx, dsMatchStart(%edx) + +/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ +/* return s->lookahead; */ + +LeaveNow: + movl deflatestate(%esp), %edx + movl bestlen(%esp), %ebx + movl dsLookahead(%edx), %eax + cmpl %eax, %ebx + jg LookaheadRet + movl %ebx, %eax +LookaheadRet: + +/* Restore the stack and return from whence we came. */ + + addl $LocalVarsSize, %esp + popl %ebx + popl %esi + popl %edi + popl %ebp +match_init: ret Added: external/zlib/contrib/asm686/README.686 ============================================================================== --- (empty file) +++ external/zlib/contrib/asm686/README.686 Tue Jan 3 07:42:59 2006 @@ -0,0 +1,34 @@ +This is a patched version of zlib, modified to use +Pentium-Pro-optimized assembly code in the deflation algorithm. The +files changed/added by this patch are: + +README.686 +match.S + +The speedup that this patch provides varies, depending on whether the +compiler used to build the original version of zlib falls afoul of the +PPro's speed traps. My own tests show a speedup of around 10-20% at +the default compression level, and 20-30% using -9, against a version +compiled using gcc 2.7.2.3. Your mileage may vary. + +Note that this code has been tailored for the PPro/PII in particular, +and will not perform particuarly well on a Pentium. + +If you are using an assembler other than GNU as, you will have to +translate match.S to use your assembler's syntax. (Have fun.) + +Brian Raiter +breadbox at muppetlabs.com +April, 1998 + + +Added for zlib 1.1.3: + +The patches come from +http://www.muppetlabs.com/~breadbox/software/assembly.html + +To compile zlib with this asm file, copy match.S to the zlib directory +then do: + +CFLAGS="-O3 -DASMV" ./configure +make OBJA=match.o Added: external/zlib/contrib/asm686/match.S ============================================================================== --- (empty file) +++ external/zlib/contrib/asm686/match.S Tue Jan 3 07:42:59 2006 @@ -0,0 +1,329 @@ +/* match.s -- Pentium-Pro-optimized version of longest_match() + * Written for zlib 1.1.2 + * Copyright (C) 1998 Brian Raiter + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License. + */ + +#ifndef NO_UNDERLINE +#define match_init _match_init +#define longest_match _longest_match +#endif + +#define MAX_MATCH (258) +#define MIN_MATCH (3) +#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1) +#define MAX_MATCH_8 ((MAX_MATCH + 7) & ~7) + +/* stack frame offsets */ + +#define chainlenwmask 0 /* high word: current chain len */ + /* low word: s->wmask */ +#define window 4 /* local copy of s->window */ +#define windowbestlen 8 /* s->window + bestlen */ +#define scanstart 16 /* first two bytes of string */ +#define scanend 12 /* last two bytes of string */ +#define scanalign 20 /* dword-misalignment of string */ +#define nicematch 24 /* a good enough match size */ +#define bestlen 28 /* size of best match so far */ +#define scan 32 /* ptr to string wanting match */ + +#define LocalVarsSize (36) +/* saved ebx 36 */ +/* saved edi 40 */ +/* saved esi 44 */ +/* saved ebp 48 */ +/* return address 52 */ +#define deflatestate 56 /* the function arguments */ +#define curmatch 60 + +/* All the +zlib1222add offsets are due to the addition of fields + * in zlib in the deflate_state structure since the asm code was first written + * (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). + * (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). + * if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). + */ + +#define zlib1222add (8) + +#define dsWSize (36+zlib1222add) +#define dsWMask (44+zlib1222add) +#define dsWindow (48+zlib1222add) +#define dsPrev (56+zlib1222add) +#define dsMatchLen (88+zlib1222add) +#define dsPrevMatch (92+zlib1222add) +#define dsStrStart (100+zlib1222add) +#define dsMatchStart (104+zlib1222add) +#define dsLookahead (108+zlib1222add) +#define dsPrevLen (112+zlib1222add) +#define dsMaxChainLen (116+zlib1222add) +#define dsGoodMatch (132+zlib1222add) +#define dsNiceMatch (136+zlib1222add) + + +.file "match.S" + +.globl match_init, longest_match + +.text + +/* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */ + +longest_match: + +/* Save registers that the compiler may be using, and adjust %esp to */ +/* make room for our stack frame. */ + + pushl %ebp + pushl %edi + pushl %esi + pushl %ebx + subl $LocalVarsSize, %esp + +/* Retrieve the function arguments. %ecx will hold cur_match */ +/* throughout the entire function. %edx will hold the pointer to the */ +/* deflate_state structure during the function's setup (before */ +/* entering the main loop). */ + + movl deflatestate(%esp), %edx + movl curmatch(%esp), %ecx + +/* uInt wmask = s->w_mask; */ +/* unsigned chain_length = s->max_chain_length; */ +/* if (s->prev_length >= s->good_match) { */ +/* chain_length >>= 2; */ +/* } */ + + movl dsPrevLen(%edx), %eax + movl dsGoodMatch(%edx), %ebx + cmpl %ebx, %eax + movl dsWMask(%edx), %eax + movl dsMaxChainLen(%edx), %ebx + jl LastMatchGood + shrl $2, %ebx +LastMatchGood: + +/* chainlen is decremented once beforehand so that the function can */ +/* use the sign flag instead of the zero flag for the exit test. */ +/* It is then shifted into the high word, to make room for the wmask */ +/* value, which it will always accompany. */ + + decl %ebx + shll $16, %ebx + orl %eax, %ebx + movl %ebx, chainlenwmask(%esp) + +/* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; */ + + movl dsNiceMatch(%edx), %eax + movl dsLookahead(%edx), %ebx + cmpl %eax, %ebx + jl LookaheadLess + movl %eax, %ebx +LookaheadLess: movl %ebx, nicematch(%esp) + +/* register Bytef *scan = s->window + s->strstart; */ + + movl dsWindow(%edx), %esi + movl %esi, window(%esp) + movl dsStrStart(%edx), %ebp + lea (%esi,%ebp), %edi + movl %edi, scan(%esp) + +/* Determine how many bytes the scan ptr is off from being */ +/* dword-aligned. */ + + movl %edi, %eax + negl %eax + andl $3, %eax + movl %eax, scanalign(%esp) + +/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ +/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ + + movl dsWSize(%edx), %eax + subl $MIN_LOOKAHEAD, %eax + subl %eax, %ebp + jg LimitPositive + xorl %ebp, %ebp +LimitPositive: + +/* int best_len = s->prev_length; */ + + movl dsPrevLen(%edx), %eax + movl %eax, bestlen(%esp) + +/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ + + addl %eax, %esi + movl %esi, windowbestlen(%esp) + +/* register ush scan_start = *(ushf*)scan; */ +/* register ush scan_end = *(ushf*)(scan+best_len-1); */ +/* Posf *prev = s->prev; */ + + movzwl (%edi), %ebx + movl %ebx, scanstart(%esp) + movzwl -1(%edi,%eax), %ebx + movl %ebx, scanend(%esp) + movl dsPrev(%edx), %edi + +/* Jump into the main loop. */ + + movl chainlenwmask(%esp), %edx + jmp LoopEntry + +.balign 16 + +/* do { + * match = s->window + cur_match; + * if (*(ushf*)(match+best_len-1) != scan_end || + * *(ushf*)match != scan_start) continue; + * [...] + * } while ((cur_match = prev[cur_match & wmask]) > limit + * && --chain_length != 0); + * + * Here is the inner loop of the function. The function will spend the + * majority of its time in this loop, and majority of that time will + * be spent in the first ten instructions. + * + * Within this loop: + * %ebx = scanend + * %ecx = curmatch + * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) + * %esi = windowbestlen - i.e., (window + bestlen) + * %edi = prev + * %ebp = limit + */ +LookupLoop: + andl %edx, %ecx + movzwl (%edi,%ecx,2), %ecx + cmpl %ebp, %ecx + jbe LeaveNow + subl $0x00010000, %edx + js LeaveNow +LoopEntry: movzwl -1(%esi,%ecx), %eax + cmpl %ebx, %eax + jnz LookupLoop + movl window(%esp), %eax + movzwl (%eax,%ecx), %eax + cmpl scanstart(%esp), %eax + jnz LookupLoop + +/* Store the current value of chainlen. */ + + movl %edx, chainlenwmask(%esp) + +/* Point %edi to the string under scrutiny, and %esi to the string we */ +/* are hoping to match it up with. In actuality, %esi and %edi are */ +/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ +/* initialized to -(MAX_MATCH_8 - scanalign). */ + + movl window(%esp), %esi + movl scan(%esp), %edi + addl %ecx, %esi + movl scanalign(%esp), %eax + movl $(-MAX_MATCH_8), %edx + lea MAX_MATCH_8(%edi,%eax), %edi + lea MAX_MATCH_8(%esi,%eax), %esi + +/* Test the strings for equality, 8 bytes at a time. At the end, + * adjust %edx so that it is offset to the exact byte that mismatched. + * + * We already know at this point that the first three bytes of the + * strings match each other, and they can be safely passed over before + * starting the compare loop. So what this code does is skip over 0-3 + * bytes, as much as necessary in order to dword-align the %edi + * pointer. (%esi will still be misaligned three times out of four.) + * + * It should be confessed that this loop usually does not represent + * much of the total running time. Replacing it with a more + * straightforward "rep cmpsb" would not drastically degrade + * performance. + */ +LoopCmps: + movl (%esi,%edx), %eax + xorl (%edi,%edx), %eax + jnz LeaveLoopCmps + movl 4(%esi,%edx), %eax + xorl 4(%edi,%edx), %eax + jnz LeaveLoopCmps4 + addl $8, %edx + jnz LoopCmps + jmp LenMaximum +LeaveLoopCmps4: addl $4, %edx +LeaveLoopCmps: testl $0x0000FFFF, %eax + jnz LenLower + addl $2, %edx + shrl $16, %eax +LenLower: subb $1, %al + adcl $0, %edx + +/* Calculate the length of the match. If it is longer than MAX_MATCH, */ +/* then automatically accept it as the best possible match and leave. */ + + lea (%edi,%edx), %eax + movl scan(%esp), %edi + subl %edi, %eax + cmpl $MAX_MATCH, %eax + jge LenMaximum + +/* If the length of the match is not longer than the best match we */ +/* have so far, then forget it and return to the lookup loop. */ + + movl deflatestate(%esp), %edx + movl bestlen(%esp), %ebx + cmpl %ebx, %eax + jg LongerMatch + movl windowbestlen(%esp), %esi + movl dsPrev(%edx), %edi + movl scanend(%esp), %ebx + movl chainlenwmask(%esp), %edx + jmp LookupLoop + +/* s->match_start = cur_match; */ +/* best_len = len; */ +/* if (len >= nice_match) break; */ +/* scan_end = *(ushf*)(scan+best_len-1); */ + +LongerMatch: movl nicematch(%esp), %ebx + movl %eax, bestlen(%esp) + movl %ecx, dsMatchStart(%edx) + cmpl %ebx, %eax + jge LeaveNow + movl window(%esp), %esi + addl %eax, %esi + movl %esi, windowbestlen(%esp) + movzwl -1(%edi,%eax), %ebx + movl dsPrev(%edx), %edi + movl %ebx, scanend(%esp) + movl chainlenwmask(%esp), %edx + jmp LookupLoop + +/* Accept the current string, with the maximum possible length. */ + +LenMaximum: movl deflatestate(%esp), %edx + movl $MAX_MATCH, bestlen(%esp) + movl %ecx, dsMatchStart(%edx) + +/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ +/* return s->lookahead; */ + +LeaveNow: + movl deflatestate(%esp), %edx + movl bestlen(%esp), %ebx + movl dsLookahead(%edx), %eax + cmpl %eax, %ebx + jg LookaheadRet + movl %ebx, %eax +LookaheadRet: + +/* Restore the stack and return from whence we came. */ + + addl $LocalVarsSize, %esp + popl %ebx + popl %esi + popl %edi + popl %ebp +match_init: ret Added: external/zlib/contrib/blast/Makefile ============================================================================== --- (empty file) +++ external/zlib/contrib/blast/Makefile Tue Jan 3 07:42:59 2006 @@ -0,0 +1,8 @@ +blast: blast.c blast.h + cc -DTEST -o blast blast.c + +test: blast + blast < test.pk | cmp - test.txt + +clean: + rm -f blast blast.o Added: external/zlib/contrib/blast/README ============================================================================== --- (empty file) +++ external/zlib/contrib/blast/README Tue Jan 3 07:42:59 2006 @@ -0,0 +1,4 @@ +Read blast.h for purpose and usage. + +Mark Adler +madler at alumni.caltech.edu Added: external/zlib/contrib/blast/blast.c ============================================================================== --- (empty file) +++ external/zlib/contrib/blast/blast.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,444 @@ +/* blast.c + * Copyright (C) 2003 Mark Adler + * For conditions of distribution and use, see copyright notice in blast.h + * version 1.1, 16 Feb 2003 + * + * blast.c decompresses data compressed by the PKWare Compression Library. + * This function provides functionality similar to the explode() function of + * the PKWare library, hence the name "blast". + * + * This decompressor is based on the excellent format description provided by + * Ben Rudiak-Gould in comp.compression on August 13, 2001. Interestingly, the + * example Ben provided in the post is incorrect. The distance 110001 should + * instead be 111000. When corrected, the example byte stream becomes: + * + * 00 04 82 24 25 8f 80 7f + * + * which decompresses to "AIAIAIAIAIAIA" (without the quotes). + */ + +/* + * Change history: + * + * 1.0 12 Feb 2003 - First version + * 1.1 16 Feb 2003 - Fixed distance check for > 4 GB uncompressed data + */ + +#include /* for setjmp(), longjmp(), and jmp_buf */ +#include "blast.h" /* prototype for blast() */ + +#define local static /* for local function definitions */ +#define MAXBITS 13 /* maximum code length */ +#define MAXWIN 4096 /* maximum window size */ + +/* input and output state */ +struct state { + /* input state */ + blast_in infun; /* input function provided by user */ + void *inhow; /* opaque information passed to infun() */ + unsigned char *in; /* next input location */ + unsigned left; /* available input at in */ + int bitbuf; /* bit buffer */ + int bitcnt; /* number of bits in bit buffer */ + + /* input limit error return state for bits() and decode() */ + jmp_buf env; + + /* output state */ + blast_out outfun; /* output function provided by user */ + void *outhow; /* opaque information passed to outfun() */ + unsigned next; /* index of next write location in out[] */ + int first; /* true to check distances (for first 4K) */ + unsigned char out[MAXWIN]; /* output buffer and sliding window */ +}; + +/* + * Return need bits from the input stream. This always leaves less than + * eight bits in the buffer. bits() works properly for need == 0. + * + * Format notes: + * + * - Bits are stored in bytes from the least significant bit to the most + * significant bit. Therefore bits are dropped from the bottom of the bit + * buffer, using shift right, and new bytes are appended to the top of the + * bit buffer, using shift left. + */ +local int bits(struct state *s, int need) +{ + int val; /* bit accumulator */ + + /* load at least need bits into val */ + val = s->bitbuf; + while (s->bitcnt < need) { + if (s->left == 0) { + s->left = s->infun(s->inhow, &(s->in)); + if (s->left == 0) longjmp(s->env, 1); /* out of input */ + } + val |= (int)(*(s->in)++) << s->bitcnt; /* load eight bits */ + s->left--; + s->bitcnt += 8; + } + + /* drop need bits and update buffer, always zero to seven bits left */ + s->bitbuf = val >> need; + s->bitcnt -= need; + + /* return need bits, zeroing the bits above that */ + return val & ((1 << need) - 1); +} + +/* + * Huffman code decoding tables. count[1..MAXBITS] is the number of symbols of + * each length, which for a canonical code are stepped through in order. + * symbol[] are the symbol values in canonical order, where the number of + * entries is the sum of the counts in count[]. The decoding process can be + * seen in the function decode() below. + */ +struct huffman { + short *count; /* number of symbols of each length */ + short *symbol; /* canonically ordered symbols */ +}; + +/* + * Decode a code from the stream s using huffman table h. Return the symbol or + * a negative value if there is an error. If all of the lengths are zero, i.e. + * an empty code, or if the code is incomplete and an invalid code is received, + * then -9 is returned after reading MAXBITS bits. + * + * Format notes: + * + * - The codes as stored in the compressed data are bit-reversed relative to + * a simple integer ordering of codes of the same lengths. Hence below the + * bits are pulled from the compressed data one at a time and used to + * build the code value reversed from what is in the stream in order to + * permit simple integer comparisons for decoding. + * + * - The first code for the shortest length is all ones. Subsequent codes of + * the same length are simply integer decrements of the previous code. When + * moving up a length, a one bit is appended to the code. For a complete + * code, the last code of the longest length will be all zeros. To support + * this ordering, the bits pulled during decoding are inverted to apply the + * more "natural" ordering starting with all zeros and incrementing. + */ +local int decode(struct state *s, struct huffman *h) +{ + int len; /* current number of bits in code */ + int code; /* len bits being decoded */ + int first; /* first code of length len */ + int count; /* number of codes of length len */ + int index; /* index of first code of length len in symbol table */ + int bitbuf; /* bits from stream */ + int left; /* bits left in next or left to process */ + short *next; /* next number of codes */ + + bitbuf = s->bitbuf; + left = s->bitcnt; + code = first = index = 0; + len = 1; + next = h->count + 1; + while (1) { + while (left--) { + code |= (bitbuf & 1) ^ 1; /* invert code */ + bitbuf >>= 1; + count = *next++; + if (code < first + count) { /* if length len, return symbol */ + s->bitbuf = bitbuf; + s->bitcnt = (s->bitcnt - len) & 7; + return h->symbol[index + (code - first)]; + } + index += count; /* else update for next length */ + first += count; + first <<= 1; + code <<= 1; + len++; + } + left = (MAXBITS+1) - len; + if (left == 0) break; + if (s->left == 0) { + s->left = s->infun(s->inhow, &(s->in)); + if (s->left == 0) longjmp(s->env, 1); /* out of input */ + } + bitbuf = *(s->in)++; + s->left--; + if (left > 8) left = 8; + } + return -9; /* ran out of codes */ +} + +/* + * Given a list of repeated code lengths rep[0..n-1], where each byte is a + * count (high four bits + 1) and a code length (low four bits), generate the + * list of code lengths. This compaction reduces the size of the object code. + * Then given the list of code lengths length[0..n-1] representing a canonical + * Huffman code for n symbols, construct the tables required to decode those + * codes. Those tables are the number of codes of each length, and the symbols + * sorted by length, retaining their original order within each length. The + * return value is zero for a complete code set, negative for an over- + * subscribed code set, and positive for an incomplete code set. The tables + * can be used if the return value is zero or positive, but they cannot be used + * if the return value is negative. If the return value is zero, it is not + * possible for decode() using that table to return an error--any stream of + * enough bits will resolve to a symbol. If the return value is positive, then + * it is possible for decode() using that table to return an error for received + * codes past the end of the incomplete lengths. + */ +local int construct(struct huffman *h, const unsigned char *rep, int n) +{ + int symbol; /* current symbol when stepping through length[] */ + int len; /* current length when stepping through h->count[] */ + int left; /* number of possible codes left of current length */ + short offs[MAXBITS+1]; /* offsets in symbol table for each length */ + short length[256]; /* code lengths */ + + /* convert compact repeat counts into symbol bit length list */ + symbol = 0; + do { + len = *rep++; + left = (len >> 4) + 1; + len &= 15; + do { + length[symbol++] = len; + } while (--left); + } while (--n); + n = symbol; + + /* count number of codes of each length */ + for (len = 0; len <= MAXBITS; len++) + h->count[len] = 0; + for (symbol = 0; symbol < n; symbol++) + (h->count[length[symbol]])++; /* assumes lengths are within bounds */ + if (h->count[0] == n) /* no codes! */ + return 0; /* complete, but decode() will fail */ + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; /* one possible code of zero length */ + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; /* one more bit, double codes left */ + left -= h->count[len]; /* deduct count from possible codes */ + if (left < 0) return left; /* over-subscribed--return negative */ + } /* left > 0 means incomplete */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + h->count[len]; + + /* + * put symbols in table sorted by length, by symbol order within each + * length + */ + for (symbol = 0; symbol < n; symbol++) + if (length[symbol] != 0) + h->symbol[offs[length[symbol]]++] = symbol; + + /* return zero for complete set, positive for incomplete set */ + return left; +} + +/* + * Decode PKWare Compression Library stream. + * + * Format notes: + * + * - First byte is 0 if literals are uncoded or 1 if they are coded. Second + * byte is 4, 5, or 6 for the number of extra bits in the distance code. + * This is the base-2 logarithm of the dictionary size minus six. + * + * - Compressed data is a combination of literals and length/distance pairs + * terminated by an end code. Literals are either Huffman coded or + * uncoded bytes. A length/distance pair is a coded length followed by a + * coded distance to represent a string that occurs earlier in the + * uncompressed data that occurs again at the current location. + * + * - A bit preceding a literal or length/distance pair indicates which comes + * next, 0 for literals, 1 for length/distance. + * + * - If literals are uncoded, then the next eight bits are the literal, in the + * normal bit order in th stream, i.e. no bit-reversal is needed. Similarly, + * no bit reversal is needed for either the length extra bits or the distance + * extra bits. + * + * - Literal bytes are simply written to the output. A length/distance pair is + * an instruction to copy previously uncompressed bytes to the output. The + * copy is from distance bytes back in the output stream, copying for length + * bytes. + * + * - Distances pointing before the beginning of the output data are not + * permitted. + * + * - Overlapped copies, where the length is greater than the distance, are + * allowed and common. For example, a distance of one and a length of 518 + * simply copies the last byte 518 times. A distance of four and a length of + * twelve copies the last four bytes three times. A simple forward copy + * ignoring whether the length is greater than the distance or not implements + * this correctly. + */ +local int decomp(struct state *s) +{ + int lit; /* true if literals are coded */ + int dict; /* log2(dictionary size) - 6 */ + int symbol; /* decoded symbol, extra bits for distance */ + int len; /* length for copy */ + int dist; /* distance for copy */ + int copy; /* copy counter */ + unsigned char *from, *to; /* copy pointers */ + static int virgin = 1; /* build tables once */ + static short litcnt[MAXBITS+1], litsym[256]; /* litcode memory */ + static short lencnt[MAXBITS+1], lensym[16]; /* lencode memory */ + static short distcnt[MAXBITS+1], distsym[64]; /* distcode memory */ + static struct huffman litcode = {litcnt, litsym}; /* length code */ + static struct huffman lencode = {lencnt, lensym}; /* length code */ + static struct huffman distcode = {distcnt, distsym};/* distance code */ + /* bit lengths of literal codes */ + static const unsigned char litlen[] = { + 11, 124, 8, 7, 28, 7, 188, 13, 76, 4, 10, 8, 12, 10, 12, 10, 8, 23, 8, + 9, 7, 6, 7, 8, 7, 6, 55, 8, 23, 24, 12, 11, 7, 9, 11, 12, 6, 7, 22, 5, + 7, 24, 6, 11, 9, 6, 7, 22, 7, 11, 38, 7, 9, 8, 25, 11, 8, 11, 9, 12, + 8, 12, 5, 38, 5, 38, 5, 11, 7, 5, 6, 21, 6, 10, 53, 8, 7, 24, 10, 27, + 44, 253, 253, 253, 252, 252, 252, 13, 12, 45, 12, 45, 12, 61, 12, 45, + 44, 173}; + /* bit lengths of length codes 0..15 */ + static const unsigned char lenlen[] = {2, 35, 36, 53, 38, 23}; + /* bit lengths of distance codes 0..63 */ + static const unsigned char distlen[] = {2, 20, 53, 230, 247, 151, 248}; + static const short base[16] = { /* base for length codes */ + 3, 2, 4, 5, 6, 7, 8, 9, 10, 12, 16, 24, 40, 72, 136, 264}; + static const char extra[16] = { /* extra bits for length codes */ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8}; + + /* set up decoding tables (once--might not be thread-safe) */ + if (virgin) { + construct(&litcode, litlen, sizeof(litlen)); + construct(&lencode, lenlen, sizeof(lenlen)); + construct(&distcode, distlen, sizeof(distlen)); + virgin = 0; + } + + /* read header */ + lit = bits(s, 8); + if (lit > 1) return -1; + dict = bits(s, 8); + if (dict < 4 || dict > 6) return -2; + + /* decode literals and length/distance pairs */ + do { + if (bits(s, 1)) { + /* get length */ + symbol = decode(s, &lencode); + len = base[symbol] + bits(s, extra[symbol]); + if (len == 519) break; /* end code */ + + /* get distance */ + symbol = len == 2 ? 2 : dict; + dist = decode(s, &distcode) << symbol; + dist += bits(s, symbol); + dist++; + if (s->first && dist > s->next) + return -3; /* distance too far back */ + + /* copy length bytes from distance bytes back */ + do { + to = s->out + s->next; + from = to - dist; + copy = MAXWIN; + if (s->next < dist) { + from += copy; + copy = dist; + } + copy -= s->next; + if (copy > len) copy = len; + len -= copy; + s->next += copy; + do { + *to++ = *from++; + } while (--copy); + if (s->next == MAXWIN) { + if (s->outfun(s->outhow, s->out, s->next)) return 1; + s->next = 0; + s->first = 0; + } + } while (len != 0); + } + else { + /* get literal and write it */ + symbol = lit ? decode(s, &litcode) : bits(s, 8); + s->out[s->next++] = symbol; + if (s->next == MAXWIN) { + if (s->outfun(s->outhow, s->out, s->next)) return 1; + s->next = 0; + s->first = 0; + } + } + } while (1); + return 0; +} + +/* See comments in blast.h */ +int blast(blast_in infun, void *inhow, blast_out outfun, void *outhow) +{ + struct state s; /* input/output state */ + int err; /* return value */ + + /* initialize input state */ + s.infun = infun; + s.inhow = inhow; + s.left = 0; + s.bitbuf = 0; + s.bitcnt = 0; + + /* initialize output state */ + s.outfun = outfun; + s.outhow = outhow; + s.next = 0; + s.first = 1; + + /* return if bits() or decode() tries to read past available input */ + if (setjmp(s.env) != 0) /* if came back here via longjmp(), */ + err = 2; /* then skip decomp(), return error */ + else + err = decomp(&s); /* decompress */ + + /* write any leftover output and update the error code if needed */ + if (err != 1 && s.next && s.outfun(s.outhow, s.out, s.next) && err == 0) + err = 1; + return err; +} + +#ifdef TEST +/* Example of how to use blast() */ +#include +#include + +#define CHUNK 16384 + +local unsigned inf(void *how, unsigned char **buf) +{ + static unsigned char hold[CHUNK]; + + *buf = hold; + return fread(hold, 1, CHUNK, (FILE *)how); +} + +local int outf(void *how, unsigned char *buf, unsigned len) +{ + return fwrite(buf, 1, len, (FILE *)how) != len; +} + +/* Decompress a PKWare Compression Library stream from stdin to stdout */ +int main(void) +{ + int ret, n; + + /* decompress to stdout */ + ret = blast(inf, stdin, outf, stdout); + if (ret != 0) fprintf(stderr, "blast error: %d\n", ret); + + /* see if there are any leftover bytes */ + n = 0; + while (getchar() != EOF) n++; + if (n) fprintf(stderr, "blast warning: %d unused bytes of input\n", n); + + /* return blast() error code */ + return ret; +} +#endif Added: external/zlib/contrib/blast/blast.h ============================================================================== --- (empty file) +++ external/zlib/contrib/blast/blast.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,71 @@ +/* blast.h -- interface for blast.c + Copyright (C) 2003 Mark Adler + version 1.1, 16 Feb 2003 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler at alumni.caltech.edu + */ + + +/* + * blast() decompresses the PKWare Data Compression Library (DCL) compressed + * format. It provides the same functionality as the explode() function in + * that library. (Note: PKWare overused the "implode" verb, and the format + * used by their library implode() function is completely different and + * incompatible with the implode compression method supported by PKZIP.) + */ + + +typedef unsigned (*blast_in)(void *how, unsigned char **buf); +typedef int (*blast_out)(void *how, unsigned char *buf, unsigned len); +/* Definitions for input/output functions passed to blast(). See below for + * what the provided functions need to do. + */ + + +int blast(blast_in infun, void *inhow, blast_out outfun, void *outhow); +/* Decompress input to output using the provided infun() and outfun() calls. + * On success, the return value of blast() is zero. If there is an error in + * the source data, i.e. it is not in the proper format, then a negative value + * is returned. If there is not enough input available or there is not enough + * output space, then a positive error is returned. + * + * The input function is invoked: len = infun(how, &buf), where buf is set by + * infun() to point to the input buffer, and infun() returns the number of + * available bytes there. If infun() returns zero, then blast() returns with + * an input error. (blast() only asks for input if it needs it.) inhow is for + * use by the application to pass an input descriptor to infun(), if desired. + * + * The output function is invoked: err = outfun(how, buf, len), where the bytes + * to be written are buf[0..len-1]. If err is not zero, then blast() returns + * with an output error. outfun() is always called with len <= 4096. outhow + * is for use by the application to pass an output descriptor to outfun(), if + * desired. + * + * The return codes are: + * + * 2: ran out of input before completing decompression + * 1: output error before completing decompression + * 0: successful decompression + * -1: literal flag not zero or one + * -2: dictionary size not in 4..6 + * -3: distance is too far back + * + * At the bottom of blast.c is an example program that uses blast() that can be + * compiled to produce a command-line decompression filter by defining TEST. + */ Added: external/zlib/contrib/blast/test.pk ============================================================================== Binary file. No diff available. Added: external/zlib/contrib/blast/test.txt ============================================================================== --- (empty file) +++ external/zlib/contrib/blast/test.txt Tue Jan 3 07:42:59 2006 @@ -0,0 +1 @@ +AIAIAIAIAIAIA \ No newline at end of file Added: external/zlib/contrib/delphi/ZLib.pas ============================================================================== --- (empty file) +++ external/zlib/contrib/delphi/ZLib.pas Tue Jan 3 07:42:59 2006 @@ -0,0 +1,557 @@ +{*******************************************************} +{ } +{ Borland Delphi Supplemental Components } +{ ZLIB Data Compression Interface Unit } +{ } +{ Copyright (c) 1997,99 Borland Corporation } +{ } +{*******************************************************} + +{ Updated for zlib 1.2.x by Cosmin Truta } + +unit ZLib; + +interface + +uses SysUtils, Classes; + +type + TAlloc = function (AppData: Pointer; Items, Size: Integer): Pointer; cdecl; + TFree = procedure (AppData, Block: Pointer); cdecl; + + // Internal structure. Ignore. + TZStreamRec = packed record + next_in: PChar; // next input byte + avail_in: Integer; // number of bytes available at next_in + total_in: Longint; // total nb of input bytes read so far + + next_out: PChar; // next output byte should be put here + avail_out: Integer; // remaining free space at next_out + total_out: Longint; // total nb of bytes output so far + + msg: PChar; // last error message, NULL if no error + internal: Pointer; // not visible by applications + + zalloc: TAlloc; // used to allocate the internal state + zfree: TFree; // used to free the internal state + AppData: Pointer; // private data object passed to zalloc and zfree + + data_type: Integer; // best guess about the data type: ascii or binary + adler: Longint; // adler32 value of the uncompressed data + reserved: Longint; // reserved for future use + end; + + // Abstract ancestor class + TCustomZlibStream = class(TStream) + private + FStrm: TStream; + FStrmPos: Integer; + FOnProgress: TNotifyEvent; + FZRec: TZStreamRec; + FBuffer: array [Word] of Char; + protected + procedure Progress(Sender: TObject); dynamic; + property OnProgress: TNotifyEvent read FOnProgress write FOnProgress; + constructor Create(Strm: TStream); + end; + +{ TCompressionStream compresses data on the fly as data is written to it, and + stores the compressed data to another stream. + + TCompressionStream is write-only and strictly sequential. Reading from the + stream will raise an exception. Using Seek to move the stream pointer + will raise an exception. + + Output data is cached internally, written to the output stream only when + the internal output buffer is full. All pending output data is flushed + when the stream is destroyed. + + The Position property returns the number of uncompressed bytes of + data that have been written to the stream so far. + + CompressionRate returns the on-the-fly percentage by which the original + data has been compressed: (1 - (CompressedBytes / UncompressedBytes)) * 100 + If raw data size = 100 and compressed data size = 25, the CompressionRate + is 75% + + The OnProgress event is called each time the output buffer is filled and + written to the output stream. This is useful for updating a progress + indicator when you are writing a large chunk of data to the compression + stream in a single call.} + + + TCompressionLevel = (clNone, clFastest, clDefault, clMax); + + TCompressionStream = class(TCustomZlibStream) + private + function GetCompressionRate: Single; + public + constructor Create(CompressionLevel: TCompressionLevel; Dest: TStream); + destructor Destroy; override; + function Read(var Buffer; Count: Longint): Longint; override; + function Write(const Buffer; Count: Longint): Longint; override; + function Seek(Offset: Longint; Origin: Word): Longint; override; + property CompressionRate: Single read GetCompressionRate; + property OnProgress; + end; + +{ TDecompressionStream decompresses data on the fly as data is read from it. + + Compressed data comes from a separate source stream. TDecompressionStream + is read-only and unidirectional; you can seek forward in the stream, but not + backwards. The special case of setting the stream position to zero is + allowed. Seeking forward decompresses data until the requested position in + the uncompressed data has been reached. Seeking backwards, seeking relative + to the end of the stream, requesting the size of the stream, and writing to + the stream will raise an exception. + + The Position property returns the number of bytes of uncompressed data that + have been read from the stream so far. + + The OnProgress event is called each time the internal input buffer of + compressed data is exhausted and the next block is read from the input stream. + This is useful for updating a progress indicator when you are reading a + large chunk of data from the decompression stream in a single call.} + + TDecompressionStream = class(TCustomZlibStream) + public + constructor Create(Source: TStream); + destructor Destroy; override; + function Read(var Buffer; Count: Longint): Longint; override; + function Write(const Buffer; Count: Longint): Longint; override; + function Seek(Offset: Longint; Origin: Word): Longint; override; + property OnProgress; + end; + + + +{ CompressBuf compresses data, buffer to buffer, in one call. + In: InBuf = ptr to compressed data + InBytes = number of bytes in InBuf + Out: OutBuf = ptr to newly allocated buffer containing decompressed data + OutBytes = number of bytes in OutBuf } +procedure CompressBuf(const InBuf: Pointer; InBytes: Integer; + out OutBuf: Pointer; out OutBytes: Integer); + + +{ DecompressBuf decompresses data, buffer to buffer, in one call. + In: InBuf = ptr to compressed data + InBytes = number of bytes in InBuf + OutEstimate = zero, or est. size of the decompressed data + Out: OutBuf = ptr to newly allocated buffer containing decompressed data + OutBytes = number of bytes in OutBuf } +procedure DecompressBuf(const InBuf: Pointer; InBytes: Integer; + OutEstimate: Integer; out OutBuf: Pointer; out OutBytes: Integer); + +{ DecompressToUserBuf decompresses data, buffer to buffer, in one call. + In: InBuf = ptr to compressed data + InBytes = number of bytes in InBuf + Out: OutBuf = ptr to user-allocated buffer to contain decompressed data + BufSize = number of bytes in OutBuf } +procedure DecompressToUserBuf(const InBuf: Pointer; InBytes: Integer; + const OutBuf: Pointer; BufSize: Integer); + +const + zlib_version = '1.2.3'; + +type + EZlibError = class(Exception); + ECompressionError = class(EZlibError); + EDecompressionError = class(EZlibError); + +implementation + +uses ZLibConst; + +const + Z_NO_FLUSH = 0; + Z_PARTIAL_FLUSH = 1; + Z_SYNC_FLUSH = 2; + Z_FULL_FLUSH = 3; + Z_FINISH = 4; + + Z_OK = 0; + Z_STREAM_END = 1; + Z_NEED_DICT = 2; + Z_ERRNO = (-1); + Z_STREAM_ERROR = (-2); + Z_DATA_ERROR = (-3); + Z_MEM_ERROR = (-4); + Z_BUF_ERROR = (-5); + Z_VERSION_ERROR = (-6); + + Z_NO_COMPRESSION = 0; + Z_BEST_SPEED = 1; + Z_BEST_COMPRESSION = 9; + Z_DEFAULT_COMPRESSION = (-1); + + Z_FILTERED = 1; + Z_HUFFMAN_ONLY = 2; + Z_RLE = 3; + Z_DEFAULT_STRATEGY = 0; + + Z_BINARY = 0; + Z_ASCII = 1; + Z_UNKNOWN = 2; + + Z_DEFLATED = 8; + + +{$L adler32.obj} +{$L compress.obj} +{$L crc32.obj} +{$L deflate.obj} +{$L infback.obj} +{$L inffast.obj} +{$L inflate.obj} +{$L inftrees.obj} +{$L trees.obj} +{$L uncompr.obj} +{$L zutil.obj} + +procedure adler32; external; +procedure compressBound; external; +procedure crc32; external; +procedure deflateInit2_; external; +procedure deflateParams; external; + +function _malloc(Size: Integer): Pointer; cdecl; +begin + Result := AllocMem(Size); +end; + +procedure _free(Block: Pointer); cdecl; +begin + FreeMem(Block); +end; + +procedure _memset(P: Pointer; B: Byte; count: Integer); cdecl; +begin + FillChar(P^, count, B); +end; + +procedure _memcpy(dest, source: Pointer; count: Integer); cdecl; +begin + Move(source^, dest^, count); +end; + + + +// deflate compresses data +function deflateInit_(var strm: TZStreamRec; level: Integer; version: PChar; + recsize: Integer): Integer; external; +function deflate(var strm: TZStreamRec; flush: Integer): Integer; external; +function deflateEnd(var strm: TZStreamRec): Integer; external; + +// inflate decompresses data +function inflateInit_(var strm: TZStreamRec; version: PChar; + recsize: Integer): Integer; external; +function inflate(var strm: TZStreamRec; flush: Integer): Integer; external; +function inflateEnd(var strm: TZStreamRec): Integer; external; +function inflateReset(var strm: TZStreamRec): Integer; external; + + +function zlibAllocMem(AppData: Pointer; Items, Size: Integer): Pointer; cdecl; +begin +// GetMem(Result, Items*Size); + Result := AllocMem(Items * Size); +end; + +procedure zlibFreeMem(AppData, Block: Pointer); cdecl; +begin + FreeMem(Block); +end; + +{function zlibCheck(code: Integer): Integer; +begin + Result := code; + if code < 0 then + raise EZlibError.Create('error'); //!! +end;} + +function CCheck(code: Integer): Integer; +begin + Result := code; + if code < 0 then + raise ECompressionError.Create('error'); //!! +end; + +function DCheck(code: Integer): Integer; +begin + Result := code; + if code < 0 then + raise EDecompressionError.Create('error'); //!! +end; + +procedure CompressBuf(const InBuf: Pointer; InBytes: Integer; + out OutBuf: Pointer; out OutBytes: Integer); +var + strm: TZStreamRec; + P: Pointer; +begin + FillChar(strm, sizeof(strm), 0); + strm.zalloc := zlibAllocMem; + strm.zfree := zlibFreeMem; + OutBytes := ((InBytes + (InBytes div 10) + 12) + 255) and not 255; + GetMem(OutBuf, OutBytes); + try + strm.next_in := InBuf; + strm.avail_in := InBytes; + strm.next_out := OutBuf; + strm.avail_out := OutBytes; + CCheck(deflateInit_(strm, Z_BEST_COMPRESSION, zlib_version, sizeof(strm))); + try + while CCheck(deflate(strm, Z_FINISH)) <> Z_STREAM_END do + begin + P := OutBuf; + Inc(OutBytes, 256); + ReallocMem(OutBuf, OutBytes); + strm.next_out := PChar(Integer(OutBuf) + (Integer(strm.next_out) - Integer(P))); + strm.avail_out := 256; + end; + finally + CCheck(deflateEnd(strm)); + end; + ReallocMem(OutBuf, strm.total_out); + OutBytes := strm.total_out; + except + FreeMem(OutBuf); + raise + end; +end; + + +procedure DecompressBuf(const InBuf: Pointer; InBytes: Integer; + OutEstimate: Integer; out OutBuf: Pointer; out OutBytes: Integer); +var + strm: TZStreamRec; + P: Pointer; + BufInc: Integer; +begin + FillChar(strm, sizeof(strm), 0); + strm.zalloc := zlibAllocMem; + strm.zfree := zlibFreeMem; + BufInc := (InBytes + 255) and not 255; + if OutEstimate = 0 then + OutBytes := BufInc + else + OutBytes := OutEstimate; + GetMem(OutBuf, OutBytes); + try + strm.next_in := InBuf; + strm.avail_in := InBytes; + strm.next_out := OutBuf; + strm.avail_out := OutBytes; + DCheck(inflateInit_(strm, zlib_version, sizeof(strm))); + try + while DCheck(inflate(strm, Z_NO_FLUSH)) <> Z_STREAM_END do + begin + P := OutBuf; + Inc(OutBytes, BufInc); + ReallocMem(OutBuf, OutBytes); + strm.next_out := PChar(Integer(OutBuf) + (Integer(strm.next_out) - Integer(P))); + strm.avail_out := BufInc; + end; + finally + DCheck(inflateEnd(strm)); + end; + ReallocMem(OutBuf, strm.total_out); + OutBytes := strm.total_out; + except + FreeMem(OutBuf); + raise + end; +end; + +procedure DecompressToUserBuf(const InBuf: Pointer; InBytes: Integer; + const OutBuf: Pointer; BufSize: Integer); +var + strm: TZStreamRec; +begin + FillChar(strm, sizeof(strm), 0); + strm.zalloc := zlibAllocMem; + strm.zfree := zlibFreeMem; + strm.next_in := InBuf; + strm.avail_in := InBytes; + strm.next_out := OutBuf; + strm.avail_out := BufSize; + DCheck(inflateInit_(strm, zlib_version, sizeof(strm))); + try + if DCheck(inflate(strm, Z_FINISH)) <> Z_STREAM_END then + raise EZlibError.CreateRes(@sTargetBufferTooSmall); + finally + DCheck(inflateEnd(strm)); + end; +end; + +// TCustomZlibStream + +constructor TCustomZLibStream.Create(Strm: TStream); +begin + inherited Create; + FStrm := Strm; + FStrmPos := Strm.Position; + FZRec.zalloc := zlibAllocMem; + FZRec.zfree := zlibFreeMem; +end; + +procedure TCustomZLibStream.Progress(Sender: TObject); +begin + if Assigned(FOnProgress) then FOnProgress(Sender); +end; + + +// TCompressionStream + +constructor TCompressionStream.Create(CompressionLevel: TCompressionLevel; + Dest: TStream); +const + Levels: array [TCompressionLevel] of ShortInt = + (Z_NO_COMPRESSION, Z_BEST_SPEED, Z_DEFAULT_COMPRESSION, Z_BEST_COMPRESSION); +begin + inherited Create(Dest); + FZRec.next_out := FBuffer; + FZRec.avail_out := sizeof(FBuffer); + CCheck(deflateInit_(FZRec, Levels[CompressionLevel], zlib_version, sizeof(FZRec))); +end; + +destructor TCompressionStream.Destroy; +begin + FZRec.next_in := nil; + FZRec.avail_in := 0; + try + if FStrm.Position <> FStrmPos then FStrm.Position := FStrmPos; + while (CCheck(deflate(FZRec, Z_FINISH)) <> Z_STREAM_END) + and (FZRec.avail_out = 0) do + begin + FStrm.WriteBuffer(FBuffer, sizeof(FBuffer)); + FZRec.next_out := FBuffer; + FZRec.avail_out := sizeof(FBuffer); + end; + if FZRec.avail_out < sizeof(FBuffer) then + FStrm.WriteBuffer(FBuffer, sizeof(FBuffer) - FZRec.avail_out); + finally + deflateEnd(FZRec); + end; + inherited Destroy; +end; + +function TCompressionStream.Read(var Buffer; Count: Longint): Longint; +begin + raise ECompressionError.CreateRes(@sInvalidStreamOp); +end; + +function TCompressionStream.Write(const Buffer; Count: Longint): Longint; +begin + FZRec.next_in := @Buffer; + FZRec.avail_in := Count; + if FStrm.Position <> FStrmPos then FStrm.Position := FStrmPos; + while (FZRec.avail_in > 0) do + begin + CCheck(deflate(FZRec, 0)); + if FZRec.avail_out = 0 then + begin + FStrm.WriteBuffer(FBuffer, sizeof(FBuffer)); + FZRec.next_out := FBuffer; + FZRec.avail_out := sizeof(FBuffer); + FStrmPos := FStrm.Position; + Progress(Self); + end; + end; + Result := Count; +end; + +function TCompressionStream.Seek(Offset: Longint; Origin: Word): Longint; +begin + if (Offset = 0) and (Origin = soFromCurrent) then + Result := FZRec.total_in + else + raise ECompressionError.CreateRes(@sInvalidStreamOp); +end; + +function TCompressionStream.GetCompressionRate: Single; +begin + if FZRec.total_in = 0 then + Result := 0 + else + Result := (1.0 - (FZRec.total_out / FZRec.total_in)) * 100.0; +end; + + +// TDecompressionStream + +constructor TDecompressionStream.Create(Source: TStream); +begin + inherited Create(Source); + FZRec.next_in := FBuffer; + FZRec.avail_in := 0; + DCheck(inflateInit_(FZRec, zlib_version, sizeof(FZRec))); +end; + +destructor TDecompressionStream.Destroy; +begin + FStrm.Seek(-FZRec.avail_in, 1); + inflateEnd(FZRec); + inherited Destroy; +end; + +function TDecompressionStream.Read(var Buffer; Count: Longint): Longint; +begin + FZRec.next_out := @Buffer; + FZRec.avail_out := Count; + if FStrm.Position <> FStrmPos then FStrm.Position := FStrmPos; + while (FZRec.avail_out > 0) do + begin + if FZRec.avail_in = 0 then + begin + FZRec.avail_in := FStrm.Read(FBuffer, sizeof(FBuffer)); + if FZRec.avail_in = 0 then + begin + Result := Count - FZRec.avail_out; + Exit; + end; + FZRec.next_in := FBuffer; + FStrmPos := FStrm.Position; + Progress(Self); + end; + CCheck(inflate(FZRec, 0)); + end; + Result := Count; +end; + +function TDecompressionStream.Write(const Buffer; Count: Longint): Longint; +begin + raise EDecompressionError.CreateRes(@sInvalidStreamOp); +end; + +function TDecompressionStream.Seek(Offset: Longint; Origin: Word): Longint; +var + I: Integer; + Buf: array [0..4095] of Char; +begin + if (Offset = 0) and (Origin = soFromBeginning) then + begin + DCheck(inflateReset(FZRec)); + FZRec.next_in := FBuffer; + FZRec.avail_in := 0; + FStrm.Position := 0; + FStrmPos := 0; + end + else if ( (Offset >= 0) and (Origin = soFromCurrent)) or + ( ((Offset - FZRec.total_out) > 0) and (Origin = soFromBeginning)) then + begin + if Origin = soFromBeginning then Dec(Offset, FZRec.total_out); + if Offset > 0 then + begin + for I := 1 to Offset div sizeof(Buf) do + ReadBuffer(Buf, sizeof(Buf)); + ReadBuffer(Buf, Offset mod sizeof(Buf)); + end; + end + else + raise EDecompressionError.CreateRes(@sInvalidStreamOp); + Result := FZRec.total_out; +end; + + +end. Added: external/zlib/contrib/delphi/ZLibConst.pas ============================================================================== --- (empty file) +++ external/zlib/contrib/delphi/ZLibConst.pas Tue Jan 3 07:42:59 2006 @@ -0,0 +1,11 @@ +unit ZLibConst; + +interface + +resourcestring + sTargetBufferTooSmall = 'ZLib error: target buffer may be too small'; + sInvalidStreamOp = 'Invalid stream operation'; + +implementation + +end. Added: external/zlib/contrib/delphi/readme.txt ============================================================================== --- (empty file) +++ external/zlib/contrib/delphi/readme.txt Tue Jan 3 07:42:59 2006 @@ -0,0 +1,76 @@ + +Overview +======== + +This directory contains an update to the ZLib interface unit, +distributed by Borland as a Delphi supplemental component. + +The original ZLib unit is Copyright (c) 1997,99 Borland Corp., +and is based on zlib version 1.0.4. There are a series of bugs +and security problems associated with that old zlib version, and +we recommend the users to update their ZLib unit. + + +Summary of modifications +======================== + +- Improved makefile, adapted to zlib version 1.2.1. + +- Some field types from TZStreamRec are changed from Integer to + Longint, for consistency with the zlib.h header, and for 64-bit + readiness. + +- The zlib_version constant is updated. + +- The new Z_RLE strategy has its corresponding symbolic constant. + +- The allocation and deallocation functions and function types + (TAlloc, TFree, zlibAllocMem and zlibFreeMem) are now cdecl, + and _malloc and _free are added as C RTL stubs. As a result, + the original C sources of zlib can be compiled out of the box, + and linked to the ZLib unit. + + +Suggestions for improvements +============================ + +Currently, the ZLib unit provides only a limited wrapper around +the zlib library, and much of the original zlib functionality is +missing. Handling compressed file formats like ZIP/GZIP or PNG +cannot be implemented without having this functionality. +Applications that handle these formats are either using their own, +duplicated code, or not using the ZLib unit at all. + +Here are a few suggestions: + +- Checksum class wrappers around adler32() and crc32(), similar + to the Java classes that implement the java.util.zip.Checksum + interface. + +- The ability to read and write raw deflate streams, without the + zlib stream header and trailer. Raw deflate streams are used + in the ZIP file format. + +- The ability to read and write gzip streams, used in the GZIP + file format, and normally produced by the gzip program. + +- The ability to select a different compression strategy, useful + to PNG and MNG image compression, and to multimedia compression + in general. Besides the compression level + + TCompressionLevel = (clNone, clFastest, clDefault, clMax); + + which, in fact, could have used the 'z' prefix and avoided + TColor-like symbols + + TCompressionLevel = (zcNone, zcFastest, zcDefault, zcMax); + + there could be a compression strategy + + TCompressionStrategy = (zsDefault, zsFiltered, zsHuffmanOnly, zsRle); + +- ZIP and GZIP stream handling via TStreams. + + +-- +Cosmin Truta Added: external/zlib/contrib/delphi/zlibd32.mak ============================================================================== --- (empty file) +++ external/zlib/contrib/delphi/zlibd32.mak Tue Jan 3 07:42:59 2006 @@ -0,0 +1,93 @@ +# Makefile for zlib +# For use with Delphi and C++ Builder under Win32 +# Updated for zlib 1.2.x by Cosmin Truta + +# ------------ Borland C++ ------------ + +# This project uses the Delphi (fastcall/register) calling convention: +LOC = -DZEXPORT=__fastcall -DZEXPORTVA=__cdecl + +CC = bcc32 +LD = bcc32 +AR = tlib +# do not use "-pr" in CFLAGS +CFLAGS = -a -d -k- -O2 $(LOC) +LDFLAGS = + + +# variables +ZLIB_LIB = zlib.lib + +OBJ1 = adler32.obj compress.obj crc32.obj deflate.obj gzio.obj infback.obj +OBJ2 = inffast.obj inflate.obj inftrees.obj trees.obj uncompr.obj zutil.obj +OBJP1 = +adler32.obj+compress.obj+crc32.obj+deflate.obj+gzio.obj+infback.obj +OBJP2 = +inffast.obj+inflate.obj+inftrees.obj+trees.obj+uncompr.obj+zutil.obj + + +# targets +all: $(ZLIB_LIB) example.exe minigzip.exe + +.c.obj: + $(CC) -c $(CFLAGS) $*.c + +adler32.obj: adler32.c zlib.h zconf.h + +compress.obj: compress.c zlib.h zconf.h + +crc32.obj: crc32.c zlib.h zconf.h crc32.h + +deflate.obj: deflate.c deflate.h zutil.h zlib.h zconf.h + +gzio.obj: gzio.c zutil.h zlib.h zconf.h + +infback.obj: infback.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h inffixed.h + +inffast.obj: inffast.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h + +inflate.obj: inflate.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h inffixed.h + +inftrees.obj: inftrees.c zutil.h zlib.h zconf.h inftrees.h + +trees.obj: trees.c zutil.h zlib.h zconf.h deflate.h trees.h + +uncompr.obj: uncompr.c zlib.h zconf.h + +zutil.obj: zutil.c zutil.h zlib.h zconf.h + +example.obj: example.c zlib.h zconf.h + +minigzip.obj: minigzip.c zlib.h zconf.h + + +# For the sake of the old Borland make, +# the command line is cut to fit in the MS-DOS 128 byte limit: +$(ZLIB_LIB): $(OBJ1) $(OBJ2) + -del $(ZLIB_LIB) + $(AR) $(ZLIB_LIB) $(OBJP1) + $(AR) $(ZLIB_LIB) $(OBJP2) + + +# testing +test: example.exe minigzip.exe + example + echo hello world | minigzip | minigzip -d + +example.exe: example.obj $(ZLIB_LIB) + $(LD) $(LDFLAGS) example.obj $(ZLIB_LIB) + +minigzip.exe: minigzip.obj $(ZLIB_LIB) + $(LD) $(LDFLAGS) minigzip.obj $(ZLIB_LIB) + + +# cleanup +clean: + -del *.obj + -del *.exe + -del *.lib + -del *.tds + -del zlib.bak + -del foo.gz + Added: external/zlib/contrib/dotzlib/DotZLib.build ============================================================================== --- (empty file) +++ external/zlib/contrib/dotzlib/DotZLib.build Tue Jan 3 07:42:59 2006 @@ -0,0 +1,33 @@ + + + A .Net wrapper library around ZLib1.dll + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file Added: external/zlib/contrib/dotzlib/DotZLib.chm ============================================================================== Binary file. No diff available. Added: external/zlib/contrib/dotzlib/DotZLib.sln ============================================================================== --- (empty file) +++ external/zlib/contrib/dotzlib/DotZLib.sln Tue Jan 3 07:42:59 2006 @@ -0,0 +1,21 @@ +Microsoft Visual Studio Solution File, Format Version 8.00 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DotZLib", "DotZLib\DotZLib.csproj", "{BB1EE0B1-1808-46CB-B786-949D91117FC5}" + ProjectSection(ProjectDependencies) = postProject + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfiguration) = preSolution + Debug = Debug + Release = Release + EndGlobalSection + GlobalSection(ProjectConfiguration) = postSolution + {BB1EE0B1-1808-46CB-B786-949D91117FC5}.Debug.ActiveCfg = Debug|.NET + {BB1EE0B1-1808-46CB-B786-949D91117FC5}.Debug.Build.0 = Debug|.NET + {BB1EE0B1-1808-46CB-B786-949D91117FC5}.Release.ActiveCfg = Release|.NET + {BB1EE0B1-1808-46CB-B786-949D91117FC5}.Release.Build.0 = Release|.NET + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + EndGlobalSection + GlobalSection(ExtensibilityAddIns) = postSolution + EndGlobalSection +EndGlobal Added: external/zlib/contrib/dotzlib/DotZLib/AssemblyInfo.cs ============================================================================== --- (empty file) +++ external/zlib/contrib/dotzlib/DotZLib/AssemblyInfo.cs Tue Jan 3 07:42:59 2006 @@ -0,0 +1,58 @@ +using System.Reflection; +using System.Runtime.CompilerServices; + +// +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +// +[assembly: AssemblyTitle("DotZLib")] +[assembly: AssemblyDescription(".Net bindings for ZLib compression dll 1.2.x")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("Henrik Ravn")] +[assembly: AssemblyProduct("")] +[assembly: AssemblyCopyright("(c) 2004 by Henrik Ravn")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Revision and Build Numbers +// by using the '*' as shown below: + +[assembly: AssemblyVersion("1.0.*")] + +// +// In order to sign your assembly you must specify a key to use. Refer to the +// Microsoft .NET Framework documentation for more information on assembly signing. +// +// Use the attributes below to control which key is used for signing. +// +// Notes: +// (*) If no key is specified, the assembly is not signed. +// (*) KeyName refers to a key that has been installed in the Crypto Service +// Provider (CSP) on your machine. KeyFile refers to a file which contains +// a key. +// (*) If the KeyFile and the KeyName values are both specified, the +// following processing occurs: +// (1) If the KeyName can be found in the CSP, that key is used. +// (2) If the KeyName does not exist and the KeyFile does exist, the key +// in the KeyFile is installed into the CSP and used. +// (*) In order to create a KeyFile, you can use the sn.exe (Strong Name) utility. +// When specifying the KeyFile, the location of the KeyFile should be +// relative to the project output directory which is +// %Project Directory%\obj\. For example, if your KeyFile is +// located in the project directory, you would specify the AssemblyKeyFile +// attribute as [assembly: AssemblyKeyFile("..\\..\\mykey.snk")] +// (*) Delay Signing is an advanced option - see the Microsoft .NET Framework +// documentation for more information on this. +// +[assembly: AssemblyDelaySign(false)] +[assembly: AssemblyKeyFile("")] +[assembly: AssemblyKeyName("")] Added: external/zlib/contrib/dotzlib/DotZLib/ChecksumImpl.cs ============================================================================== --- (empty file) +++ external/zlib/contrib/dotzlib/DotZLib/ChecksumImpl.cs Tue Jan 3 07:42:59 2006 @@ -0,0 +1,202 @@ +// +// © Copyright Henrik Ravn 2004 +// +// Use, modification and distribution are subject to the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +using System; +using System.Runtime.InteropServices; +using System.Text; + + +namespace DotZLib +{ + #region ChecksumGeneratorBase + /// + /// Implements the common functionality needed for all s + /// + /// + public abstract class ChecksumGeneratorBase : ChecksumGenerator + { + /// + /// The value of the current checksum + /// + protected uint _current; + + /// + /// Initializes a new instance of the checksum generator base - the current checksum is + /// set to zero + /// + public ChecksumGeneratorBase() + { + _current = 0; + } + + /// + /// Initializes a new instance of the checksum generator basewith a specified value + /// + /// The value to set the current checksum to + public ChecksumGeneratorBase(uint initialValue) + { + _current = initialValue; + } + + /// + /// Resets the current checksum to zero + /// + public void Reset() { _current = 0; } + + /// + /// Gets the current checksum value + /// + public uint Value { get { return _current; } } + + /// + /// Updates the current checksum with part of an array of bytes + /// + /// The data to update the checksum with + /// Where in data to start updating + /// The number of bytes from data to use + /// The sum of offset and count is larger than the length of data + /// data is a null reference + /// Offset or count is negative. + /// All the other Update methods are implmeneted in terms of this one. + /// This is therefore the only method a derived class has to implement + public abstract void Update(byte[] data, int offset, int count); + + /// + /// Updates the current checksum with an array of bytes. + /// + /// The data to update the checksum with + public void Update(byte[] data) + { + Update(data, 0, data.Length); + } + + /// + /// Updates the current checksum with the data from a string + /// + /// The string to update the checksum with + /// The characters in the string are converted by the UTF-8 encoding + public void Update(string data) + { + Update(Encoding.UTF8.GetBytes(data)); + } + + /// + /// Updates the current checksum with the data from a string, using a specific encoding + /// + /// The string to update the checksum with + /// The encoding to use + public void Update(string data, Encoding encoding) + { + Update(encoding.GetBytes(data)); + } + + } + #endregion + + #region CRC32 + /// + /// Implements a CRC32 checksum generator + /// + public sealed class CRC32Checksum : ChecksumGeneratorBase + { + #region DLL imports + + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl)] + private static extern uint crc32(uint crc, int data, uint length); + + #endregion + + /// + /// Initializes a new instance of the CRC32 checksum generator + /// + public CRC32Checksum() : base() {} + + /// + /// Initializes a new instance of the CRC32 checksum generator with a specified value + /// + /// The value to set the current checksum to + public CRC32Checksum(uint initialValue) : base(initialValue) {} + + /// + /// Updates the current checksum with part of an array of bytes + /// + /// The data to update the checksum with + /// Where in data to start updating + /// The number of bytes from data to use + /// The sum of offset and count is larger than the length of data + /// data is a null reference + /// Offset or count is negative. + public override void Update(byte[] data, int offset, int count) + { + if (offset < 0 || count < 0) throw new ArgumentOutOfRangeException(); + if ((offset+count) > data.Length) throw new ArgumentException(); + GCHandle hData = GCHandle.Alloc(data, GCHandleType.Pinned); + try + { + _current = crc32(_current, hData.AddrOfPinnedObject().ToInt32()+offset, (uint)count); + } + finally + { + hData.Free(); + } + } + + } + #endregion + + #region Adler + /// + /// Implements a checksum generator that computes the Adler checksum on data + /// + public sealed class AdlerChecksum : ChecksumGeneratorBase + { + #region DLL imports + + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl)] + private static extern uint adler32(uint adler, int data, uint length); + + #endregion + + /// + /// Initializes a new instance of the Adler checksum generator + /// + public AdlerChecksum() : base() {} + + /// + /// Initializes a new instance of the Adler checksum generator with a specified value + /// + /// The value to set the current checksum to + public AdlerChecksum(uint initialValue) : base(initialValue) {} + + /// + /// Updates the current checksum with part of an array of bytes + /// + /// The data to update the checksum with + /// Where in data to start updating + /// The number of bytes from data to use + /// The sum of offset and count is larger than the length of data + /// data is a null reference + /// Offset or count is negative. + public override void Update(byte[] data, int offset, int count) + { + if (offset < 0 || count < 0) throw new ArgumentOutOfRangeException(); + if ((offset+count) > data.Length) throw new ArgumentException(); + GCHandle hData = GCHandle.Alloc(data, GCHandleType.Pinned); + try + { + _current = adler32(_current, hData.AddrOfPinnedObject().ToInt32()+offset, (uint)count); + } + finally + { + hData.Free(); + } + } + + } + #endregion + +} \ No newline at end of file Added: external/zlib/contrib/dotzlib/DotZLib/CircularBuffer.cs ============================================================================== --- (empty file) +++ external/zlib/contrib/dotzlib/DotZLib/CircularBuffer.cs Tue Jan 3 07:42:59 2006 @@ -0,0 +1,83 @@ +// +// © Copyright Henrik Ravn 2004 +// +// Use, modification and distribution are subject to the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +using System; +using System.Diagnostics; + +namespace DotZLib +{ + + /// + /// This class implements a circular buffer + /// + internal class CircularBuffer + { + #region Private data + private int _capacity; + private int _head; + private int _tail; + private int _size; + private byte[] _buffer; + #endregion + + public CircularBuffer(int capacity) + { + Debug.Assert( capacity > 0 ); + _buffer = new byte[capacity]; + _capacity = capacity; + _head = 0; + _tail = 0; + _size = 0; + } + + public int Size { get { return _size; } } + + public int Put(byte[] source, int offset, int count) + { + Debug.Assert( count > 0 ); + int trueCount = Math.Min(count, _capacity - Size); + for (int i = 0; i < trueCount; ++i) + _buffer[(_tail+i) % _capacity] = source[offset+i]; + _tail += trueCount; + _tail %= _capacity; + _size += trueCount; + return trueCount; + } + + public bool Put(byte b) + { + if (Size == _capacity) // no room + return false; + _buffer[_tail++] = b; + _tail %= _capacity; + ++_size; + return true; + } + + public int Get(byte[] destination, int offset, int count) + { + int trueCount = Math.Min(count,Size); + for (int i = 0; i < trueCount; ++i) + destination[offset + i] = _buffer[(_head+i) % _capacity]; + _head += trueCount; + _head %= _capacity; + _size -= trueCount; + return trueCount; + } + + public int Get() + { + if (Size == 0) + return -1; + + int result = (int)_buffer[_head++ % _capacity]; + --_size; + return result; + } + + } +} Added: external/zlib/contrib/dotzlib/DotZLib/CodecBase.cs ============================================================================== --- (empty file) +++ external/zlib/contrib/dotzlib/DotZLib/CodecBase.cs Tue Jan 3 07:42:59 2006 @@ -0,0 +1,198 @@ +// +// © Copyright Henrik Ravn 2004 +// +// Use, modification and distribution are subject to the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +using System; +using System.Runtime.InteropServices; + +namespace DotZLib +{ + /// + /// Implements the common functionality needed for all s + /// + public abstract class CodecBase : Codec, IDisposable + { + + #region Data members + + /// + /// Instance of the internal zlib buffer structure that is + /// passed to all functions in the zlib dll + /// + internal ZStream _ztream = new ZStream(); + + /// + /// True if the object instance has been disposed, false otherwise + /// + protected bool _isDisposed = false; + + /// + /// The size of the internal buffers + /// + protected const int kBufferSize = 16384; + + private byte[] _outBuffer = new byte[kBufferSize]; + private byte[] _inBuffer = new byte[kBufferSize]; + + private GCHandle _hInput; + private GCHandle _hOutput; + + private uint _checksum = 0; + + #endregion + + /// + /// Initializes a new instance of the CodeBase class. + /// + public CodecBase() + { + try + { + _hInput = GCHandle.Alloc(_inBuffer, GCHandleType.Pinned); + _hOutput = GCHandle.Alloc(_outBuffer, GCHandleType.Pinned); + } + catch (Exception) + { + CleanUp(false); + throw; + } + } + + + #region Codec Members + + /// + /// Occurs when more processed data are available. + /// + public event DataAvailableHandler DataAvailable; + + /// + /// Fires the event + /// + protected void OnDataAvailable() + { + if (_ztream.total_out > 0) + { + if (DataAvailable != null) + DataAvailable( _outBuffer, 0, (int)_ztream.total_out); + resetOutput(); + } + } + + /// + /// Adds more data to the codec to be processed. + /// + /// Byte array containing the data to be added to the codec + /// Adding data may, or may not, raise the DataAvailable event + public void Add(byte[] data) + { + Add(data,0,data.Length); + } + + /// + /// Adds more data to the codec to be processed. + /// + /// Byte array containing the data to be added to the codec + /// The index of the first byte to add from data + /// The number of bytes to add + /// Adding data may, or may not, raise the DataAvailable event + /// This must be implemented by a derived class + public abstract void Add(byte[] data, int offset, int count); + + /// + /// Finishes up any pending data that needs to be processed and handled. + /// + /// This must be implemented by a derived class + public abstract void Finish(); + + /// + /// Gets the checksum of the data that has been added so far + /// + public uint Checksum { get { return _checksum; } } + + #endregion + + #region Destructor & IDisposable stuff + + /// + /// Destroys this instance + /// + ~CodecBase() + { + CleanUp(false); + } + + /// + /// Releases any unmanaged resources and calls the method of the derived class + /// + public void Dispose() + { + CleanUp(true); + } + + /// + /// Performs any codec specific cleanup + /// + /// This must be implemented by a derived class + protected abstract void CleanUp(); + + // performs the release of the handles and calls the dereived CleanUp() + private void CleanUp(bool isDisposing) + { + if (!_isDisposed) + { + CleanUp(); + if (_hInput.IsAllocated) + _hInput.Free(); + if (_hOutput.IsAllocated) + _hOutput.Free(); + + _isDisposed = true; + } + } + + + #endregion + + #region Helper methods + + /// + /// Copies a number of bytes to the internal codec buffer - ready for proccesing + /// + /// The byte array that contains the data to copy + /// The index of the first byte to copy + /// The number of bytes to copy from data + protected void copyInput(byte[] data, int startIndex, int count) + { + Array.Copy(data, startIndex, _inBuffer,0, count); + _ztream.next_in = _hInput.AddrOfPinnedObject(); + _ztream.total_in = 0; + _ztream.avail_in = (uint)count; + + } + + /// + /// Resets the internal output buffers to a known state - ready for processing + /// + protected void resetOutput() + { + _ztream.total_out = 0; + _ztream.avail_out = kBufferSize; + _ztream.next_out = _hOutput.AddrOfPinnedObject(); + } + + /// + /// Updates the running checksum property + /// + /// The new checksum value + protected void setChecksum(uint newSum) + { + _checksum = newSum; + } + #endregion + + } +} Added: external/zlib/contrib/dotzlib/DotZLib/Deflater.cs ============================================================================== --- (empty file) +++ external/zlib/contrib/dotzlib/DotZLib/Deflater.cs Tue Jan 3 07:42:59 2006 @@ -0,0 +1,106 @@ +// +// © Copyright Henrik Ravn 2004 +// +// Use, modification and distribution are subject to the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +using System; +using System.Diagnostics; +using System.Runtime.InteropServices; + +namespace DotZLib +{ + + /// + /// Implements a data compressor, using the deflate algorithm in the ZLib dll + /// + public sealed class Deflater : CodecBase + { + #region Dll imports + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl, CharSet=CharSet.Ansi)] + private static extern int deflateInit_(ref ZStream sz, int level, string vs, int size); + + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl)] + private static extern int deflate(ref ZStream sz, int flush); + + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl)] + private static extern int deflateReset(ref ZStream sz); + + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl)] + private static extern int deflateEnd(ref ZStream sz); + #endregion + + /// + /// Constructs an new instance of the Deflater + /// + /// The compression level to use for this Deflater + public Deflater(CompressLevel level) : base() + { + int retval = deflateInit_(ref _ztream, (int)level, Info.Version, Marshal.SizeOf(_ztream)); + if (retval != 0) + throw new ZLibException(retval, "Could not initialize deflater"); + + resetOutput(); + } + + /// + /// Adds more data to the codec to be processed. + /// + /// Byte array containing the data to be added to the codec + /// The index of the first byte to add from data + /// The number of bytes to add + /// Adding data may, or may not, raise the DataAvailable event + public override void Add(byte[] data, int offset, int count) + { + if (data == null) throw new ArgumentNullException(); + if (offset < 0 || count < 0) throw new ArgumentOutOfRangeException(); + if ((offset+count) > data.Length) throw new ArgumentException(); + + int total = count; + int inputIndex = offset; + int err = 0; + + while (err >= 0 && inputIndex < total) + { + copyInput(data, inputIndex, Math.Min(total - inputIndex, kBufferSize)); + while (err >= 0 && _ztream.avail_in > 0) + { + err = deflate(ref _ztream, (int)FlushTypes.None); + if (err == 0) + while (_ztream.avail_out == 0) + { + OnDataAvailable(); + err = deflate(ref _ztream, (int)FlushTypes.None); + } + inputIndex += (int)_ztream.total_in; + } + } + setChecksum( _ztream.adler ); + } + + + /// + /// Finishes up any pending data that needs to be processed and handled. + /// + public override void Finish() + { + int err; + do + { + err = deflate(ref _ztream, (int)FlushTypes.Finish); + OnDataAvailable(); + } + while (err == 0); + setChecksum( _ztream.adler ); + deflateReset(ref _ztream); + resetOutput(); + } + + /// + /// Closes the internal zlib deflate stream + /// + protected override void CleanUp() { deflateEnd(ref _ztream); } + + } +} Added: external/zlib/contrib/dotzlib/DotZLib/DotZLib.cs ============================================================================== --- (empty file) +++ external/zlib/contrib/dotzlib/DotZLib/DotZLib.cs Tue Jan 3 07:42:59 2006 @@ -0,0 +1,288 @@ +// +// © Copyright Henrik Ravn 2004 +// +// Use, modification and distribution are subject to the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +using System; +using System.IO; +using System.Runtime.InteropServices; +using System.Text; + + +namespace DotZLib +{ + + #region Internal types + + /// + /// Defines constants for the various flush types used with zlib + /// + internal enum FlushTypes + { + None, Partial, Sync, Full, Finish, Block + } + + #region ZStream structure + // internal mapping of the zlib zstream structure for marshalling + [StructLayoutAttribute(LayoutKind.Sequential, Pack=4, Size=0, CharSet=CharSet.Ansi)] + internal struct ZStream + { + public IntPtr next_in; + public uint avail_in; + public uint total_in; + + public IntPtr next_out; + public uint avail_out; + public uint total_out; + + [MarshalAs(UnmanagedType.LPStr)] + string msg; + uint state; + + uint zalloc; + uint zfree; + uint opaque; + + int data_type; + public uint adler; + uint reserved; + } + + #endregion + + #endregion + + #region Public enums + /// + /// Defines constants for the available compression levels in zlib + /// + public enum CompressLevel : int + { + /// + /// The default compression level with a reasonable compromise between compression and speed + /// + Default = -1, + /// + /// No compression at all. The data are passed straight through. + /// + None = 0, + /// + /// The maximum compression rate available. + /// + Best = 9, + /// + /// The fastest available compression level. + /// + Fastest = 1 + } + #endregion + + #region Exception classes + /// + /// The exception that is thrown when an error occurs on the zlib dll + /// + public class ZLibException : ApplicationException + { + /// + /// Initializes a new instance of the class with a specified + /// error message and error code + /// + /// The zlib error code that caused the exception + /// A message that (hopefully) describes the error + public ZLibException(int errorCode, string msg) : base(String.Format("ZLib error {0} {1}", errorCode, msg)) + { + } + + /// + /// Initializes a new instance of the class with a specified + /// error code + /// + /// The zlib error code that caused the exception + public ZLibException(int errorCode) : base(String.Format("ZLib error {0}", errorCode)) + { + } + } + #endregion + + #region Interfaces + + /// + /// Declares methods and properties that enables a running checksum to be calculated + /// + public interface ChecksumGenerator + { + /// + /// Gets the current value of the checksum + /// + uint Value { get; } + + /// + /// Clears the current checksum to 0 + /// + void Reset(); + + /// + /// Updates the current checksum with an array of bytes + /// + /// The data to update the checksum with + void Update(byte[] data); + + /// + /// Updates the current checksum with part of an array of bytes + /// + /// The data to update the checksum with + /// Where in data to start updating + /// The number of bytes from data to use + /// The sum of offset and count is larger than the length of data + /// data is a null reference + /// Offset or count is negative. + void Update(byte[] data, int offset, int count); + + /// + /// Updates the current checksum with the data from a string + /// + /// The string to update the checksum with + /// The characters in the string are converted by the UTF-8 encoding + void Update(string data); + + /// + /// Updates the current checksum with the data from a string, using a specific encoding + /// + /// The string to update the checksum with + /// The encoding to use + void Update(string data, Encoding encoding); + } + + + /// + /// Represents the method that will be called from a codec when new data + /// are available. + /// + /// The byte array containing the processed data + /// The index of the first processed byte in data + /// The number of processed bytes available + /// On return from this method, the data may be overwritten, so grab it while you can. + /// You cannot assume that startIndex will be zero. + /// + public delegate void DataAvailableHandler(byte[] data, int startIndex, int count); + + /// + /// Declares methods and events for implementing compressors/decompressors + /// + public interface Codec + { + /// + /// Occurs when more processed data are available. + /// + event DataAvailableHandler DataAvailable; + + /// + /// Adds more data to the codec to be processed. + /// + /// Byte array containing the data to be added to the codec + /// Adding data may, or may not, raise the DataAvailable event + void Add(byte[] data); + + /// + /// Adds more data to the codec to be processed. + /// + /// Byte array containing the data to be added to the codec + /// The index of the first byte to add from data + /// The number of bytes to add + /// Adding data may, or may not, raise the DataAvailable event + void Add(byte[] data, int offset, int count); + + /// + /// Finishes up any pending data that needs to be processed and handled. + /// + void Finish(); + + /// + /// Gets the checksum of the data that has been added so far + /// + uint Checksum { get; } + + + } + + #endregion + + #region Classes + /// + /// Encapsulates general information about the ZLib library + /// + public class Info + { + #region DLL imports + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl)] + private static extern uint zlibCompileFlags(); + + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl)] + private static extern string zlibVersion(); + #endregion + + #region Private stuff + private uint _flags; + + // helper function that unpacks a bitsize mask + private static int bitSize(uint bits) + { + switch (bits) + { + case 0: return 16; + case 1: return 32; + case 2: return 64; + } + return -1; + } + #endregion + + /// + /// Constructs an instance of the Info class. + /// + public Info() + { + _flags = zlibCompileFlags(); + } + + /// + /// True if the library is compiled with debug info + /// + public bool HasDebugInfo { get { return 0 != (_flags & 0x100); } } + + /// + /// True if the library is compiled with assembly optimizations + /// + public bool UsesAssemblyCode { get { return 0 != (_flags & 0x200); } } + + /// + /// Gets the size of the unsigned int that was compiled into Zlib + /// + public int SizeOfUInt { get { return bitSize(_flags & 3); } } + + /// + /// Gets the size of the unsigned long that was compiled into Zlib + /// + public int SizeOfULong { get { return bitSize((_flags >> 2) & 3); } } + + /// + /// Gets the size of the pointers that were compiled into Zlib + /// + public int SizeOfPointer { get { return bitSize((_flags >> 4) & 3); } } + + /// + /// Gets the size of the z_off_t type that was compiled into Zlib + /// + public int SizeOfOffset { get { return bitSize((_flags >> 6) & 3); } } + + /// + /// Gets the version of ZLib as a string, e.g. "1.2.1" + /// + public static string Version { get { return zlibVersion(); } } + } + + #endregion + +} Added: external/zlib/contrib/dotzlib/DotZLib/DotZLib.csproj ============================================================================== --- (empty file) +++ external/zlib/contrib/dotzlib/DotZLib/DotZLib.csproj Tue Jan 3 07:42:59 2006 @@ -0,0 +1,141 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Added: external/zlib/contrib/dotzlib/DotZLib/GZipStream.cs ============================================================================== --- (empty file) +++ external/zlib/contrib/dotzlib/DotZLib/GZipStream.cs Tue Jan 3 07:42:59 2006 @@ -0,0 +1,301 @@ +// +// © Copyright Henrik Ravn 2004 +// +// Use, modification and distribution are subject to the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +using System; +using System.IO; +using System.Runtime.InteropServices; + +namespace DotZLib +{ + /// + /// Implements a compressed , in GZip (.gz) format. + /// + public class GZipStream : Stream, IDisposable + { + #region Dll Imports + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl, CharSet=CharSet.Ansi)] + private static extern IntPtr gzopen(string name, string mode); + + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl)] + private static extern int gzclose(IntPtr gzFile); + + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl)] + private static extern int gzwrite(IntPtr gzFile, int data, int length); + + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl)] + private static extern int gzread(IntPtr gzFile, int data, int length); + + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl)] + private static extern int gzgetc(IntPtr gzFile); + + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl)] + private static extern int gzputc(IntPtr gzFile, int c); + + #endregion + + #region Private data + private IntPtr _gzFile; + private bool _isDisposed = false; + private bool _isWriting; + #endregion + + #region Constructors + /// + /// Creates a new file as a writeable GZipStream + /// + /// The name of the compressed file to create + /// The compression level to use when adding data + /// If an error occurred in the internal zlib function + public GZipStream(string fileName, CompressLevel level) + { + _isWriting = true; + _gzFile = gzopen(fileName, String.Format("wb{0}", (int)level)); + if (_gzFile == IntPtr.Zero) + throw new ZLibException(-1, "Could not open " + fileName); + } + + /// + /// Opens an existing file as a readable GZipStream + /// + /// The name of the file to open + /// If an error occurred in the internal zlib function + public GZipStream(string fileName) + { + _isWriting = false; + _gzFile = gzopen(fileName, "rb"); + if (_gzFile == IntPtr.Zero) + throw new ZLibException(-1, "Could not open " + fileName); + + } + #endregion + + #region Access properties + /// + /// Returns true of this stream can be read from, false otherwise + /// + public override bool CanRead + { + get + { + return !_isWriting; + } + } + + + /// + /// Returns false. + /// + public override bool CanSeek + { + get + { + return false; + } + } + + /// + /// Returns true if this tsream is writeable, false otherwise + /// + public override bool CanWrite + { + get + { + return _isWriting; + } + } + #endregion + + #region Destructor & IDispose stuff + + /// + /// Destroys this instance + /// + ~GZipStream() + { + cleanUp(false); + } + + /// + /// Closes the external file handle + /// + public void Dispose() + { + cleanUp(true); + } + + // Does the actual closing of the file handle. + private void cleanUp(bool isDisposing) + { + if (!_isDisposed) + { + gzclose(_gzFile); + _isDisposed = true; + } + } + #endregion + + #region Basic reading and writing + /// + /// Attempts to read a number of bytes from the stream. + /// + /// The destination data buffer + /// The index of the first destination byte in buffer + /// The number of bytes requested + /// The number of bytes read + /// If buffer is null + /// If count or offset are negative + /// If offset + count is > buffer.Length + /// If this stream is not readable. + /// If this stream has been disposed. + public override int Read(byte[] buffer, int offset, int count) + { + if (!CanRead) throw new NotSupportedException(); + if (buffer == null) throw new ArgumentNullException(); + if (offset < 0 || count < 0) throw new ArgumentOutOfRangeException(); + if ((offset+count) > buffer.Length) throw new ArgumentException(); + if (_isDisposed) throw new ObjectDisposedException("GZipStream"); + + GCHandle h = GCHandle.Alloc(buffer, GCHandleType.Pinned); + int result; + try + { + result = gzread(_gzFile, h.AddrOfPinnedObject().ToInt32() + offset, count); + if (result < 0) + throw new IOException(); + } + finally + { + h.Free(); + } + return result; + } + + /// + /// Attempts to read a single byte from the stream. + /// + /// The byte that was read, or -1 in case of error or End-Of-File + public override int ReadByte() + { + if (!CanRead) throw new NotSupportedException(); + if (_isDisposed) throw new ObjectDisposedException("GZipStream"); + return gzgetc(_gzFile); + } + + /// + /// Writes a number of bytes to the stream + /// + /// + /// + /// + /// If buffer is null + /// If count or offset are negative + /// If offset + count is > buffer.Length + /// If this stream is not writeable. + /// If this stream has been disposed. + public override void Write(byte[] buffer, int offset, int count) + { + if (!CanWrite) throw new NotSupportedException(); + if (buffer == null) throw new ArgumentNullException(); + if (offset < 0 || count < 0) throw new ArgumentOutOfRangeException(); + if ((offset+count) > buffer.Length) throw new ArgumentException(); + if (_isDisposed) throw new ObjectDisposedException("GZipStream"); + + GCHandle h = GCHandle.Alloc(buffer, GCHandleType.Pinned); + try + { + int result = gzwrite(_gzFile, h.AddrOfPinnedObject().ToInt32() + offset, count); + if (result < 0) + throw new IOException(); + } + finally + { + h.Free(); + } + } + + /// + /// Writes a single byte to the stream + /// + /// The byte to add to the stream. + /// If this stream is not writeable. + /// If this stream has been disposed. + public override void WriteByte(byte value) + { + if (!CanWrite) throw new NotSupportedException(); + if (_isDisposed) throw new ObjectDisposedException("GZipStream"); + + int result = gzputc(_gzFile, (int)value); + if (result < 0) + throw new IOException(); + } + #endregion + + #region Position & length stuff + /// + /// Not supported. + /// + /// + /// Always thrown + public override void SetLength(long value) + { + throw new NotSupportedException(); + } + + /// + /// Not suppported. + /// + /// + /// + /// + /// Always thrown + public override long Seek(long offset, SeekOrigin origin) + { + throw new NotSupportedException(); + } + + /// + /// Flushes the GZipStream. + /// + /// In this implementation, this method does nothing. This is because excessive + /// flushing may degrade the achievable compression rates. + public override void Flush() + { + // left empty on purpose + } + + /// + /// Gets/sets the current position in the GZipStream. Not suppported. + /// + /// In this implementation this property is not supported + /// Always thrown + public override long Position + { + get + { + throw new NotSupportedException(); + } + set + { + throw new NotSupportedException(); + } + } + + /// + /// Gets the size of the stream. Not suppported. + /// + /// In this implementation this property is not supported + /// Always thrown + public override long Length + { + get + { + throw new NotSupportedException(); + } + } + #endregion + } +} Added: external/zlib/contrib/dotzlib/DotZLib/Inflater.cs ============================================================================== --- (empty file) +++ external/zlib/contrib/dotzlib/DotZLib/Inflater.cs Tue Jan 3 07:42:59 2006 @@ -0,0 +1,105 @@ +// +// © Copyright Henrik Ravn 2004 +// +// Use, modification and distribution are subject to the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +using System; +using System.Diagnostics; +using System.Runtime.InteropServices; + +namespace DotZLib +{ + + /// + /// Implements a data decompressor, using the inflate algorithm in the ZLib dll + /// + public class Inflater : CodecBase + { + #region Dll imports + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl, CharSet=CharSet.Ansi)] + private static extern int inflateInit_(ref ZStream sz, string vs, int size); + + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl)] + private static extern int inflate(ref ZStream sz, int flush); + + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl)] + private static extern int inflateReset(ref ZStream sz); + + [DllImport("ZLIB1.dll", CallingConvention=CallingConvention.Cdecl)] + private static extern int inflateEnd(ref ZStream sz); + #endregion + + /// + /// Constructs an new instance of the Inflater + /// + public Inflater() : base() + { + int retval = inflateInit_(ref _ztream, Info.Version, Marshal.SizeOf(_ztream)); + if (retval != 0) + throw new ZLibException(retval, "Could not initialize inflater"); + + resetOutput(); + } + + + /// + /// Adds more data to the codec to be processed. + /// + /// Byte array containing the data to be added to the codec + /// The index of the first byte to add from data + /// The number of bytes to add + /// Adding data may, or may not, raise the DataAvailable event + public override void Add(byte[] data, int offset, int count) + { + if (data == null) throw new ArgumentNullException(); + if (offset < 0 || count < 0) throw new ArgumentOutOfRangeException(); + if ((offset+count) > data.Length) throw new ArgumentException(); + + int total = count; + int inputIndex = offset; + int err = 0; + + while (err >= 0 && inputIndex < total) + { + copyInput(data, inputIndex, Math.Min(total - inputIndex, kBufferSize)); + err = inflate(ref _ztream, (int)FlushTypes.None); + if (err == 0) + while (_ztream.avail_out == 0) + { + OnDataAvailable(); + err = inflate(ref _ztream, (int)FlushTypes.None); + } + + inputIndex += (int)_ztream.total_in; + } + setChecksum( _ztream.adler ); + } + + + /// + /// Finishes up any pending data that needs to be processed and handled. + /// + public override void Finish() + { + int err; + do + { + err = inflate(ref _ztream, (int)FlushTypes.Finish); + OnDataAvailable(); + } + while (err == 0); + setChecksum( _ztream.adler ); + inflateReset(ref _ztream); + resetOutput(); + } + + /// + /// Closes the internal zlib inflate stream + /// + protected override void CleanUp() { inflateEnd(ref _ztream); } + + + } +} Added: external/zlib/contrib/dotzlib/DotZLib/UnitTests.cs ============================================================================== --- (empty file) +++ external/zlib/contrib/dotzlib/DotZLib/UnitTests.cs Tue Jan 3 07:42:59 2006 @@ -0,0 +1,274 @@ +// +// © Copyright Henrik Ravn 2004 +// +// Use, modification and distribution are subject to the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +using System; +using System.Collections; +using System.IO; + +// uncomment the define below to include unit tests +//#define nunit +#if nunit +using NUnit.Framework; + +// Unit tests for the DotZLib class library +// ---------------------------------------- +// +// Use this with NUnit 2 from http://www.nunit.org +// + +namespace DotZLibTests +{ + using DotZLib; + + // helper methods + internal class Utils + { + public static bool byteArrEqual( byte[] lhs, byte[] rhs ) + { + if (lhs.Length != rhs.Length) + return false; + for (int i = lhs.Length-1; i >= 0; --i) + if (lhs[i] != rhs[i]) + return false; + return true; + } + + } + + + [TestFixture] + public class CircBufferTests + { + #region Circular buffer tests + [Test] + public void SinglePutGet() + { + CircularBuffer buf = new CircularBuffer(10); + Assert.AreEqual( 0, buf.Size ); + Assert.AreEqual( -1, buf.Get() ); + + Assert.IsTrue(buf.Put( 1 )); + Assert.AreEqual( 1, buf.Size ); + Assert.AreEqual( 1, buf.Get() ); + Assert.AreEqual( 0, buf.Size ); + Assert.AreEqual( -1, buf.Get() ); + } + + [Test] + public void BlockPutGet() + { + CircularBuffer buf = new CircularBuffer(10); + byte[] arr = {1,2,3,4,5,6,7,8,9,10}; + Assert.AreEqual( 10, buf.Put(arr,0,10) ); + Assert.AreEqual( 10, buf.Size ); + Assert.IsFalse( buf.Put(11) ); + Assert.AreEqual( 1, buf.Get() ); + Assert.IsTrue( buf.Put(11) ); + + byte[] arr2 = (byte[])arr.Clone(); + Assert.AreEqual( 9, buf.Get(arr2,1,9) ); + Assert.IsTrue( Utils.byteArrEqual(arr,arr2) ); + } + + #endregion + } + + [TestFixture] + public class ChecksumTests + { + #region CRC32 Tests + [Test] + public void CRC32_Null() + { + CRC32Checksum crc32 = new CRC32Checksum(); + Assert.AreEqual( 0, crc32.Value ); + + crc32 = new CRC32Checksum(1); + Assert.AreEqual( 1, crc32.Value ); + + crc32 = new CRC32Checksum(556); + Assert.AreEqual( 556, crc32.Value ); + } + + [Test] + public void CRC32_Data() + { + CRC32Checksum crc32 = new CRC32Checksum(); + byte[] data = { 1,2,3,4,5,6,7 }; + crc32.Update(data); + Assert.AreEqual( 0x70e46888, crc32.Value ); + + crc32 = new CRC32Checksum(); + crc32.Update("penguin"); + Assert.AreEqual( 0x0e5c1a120, crc32.Value ); + + crc32 = new CRC32Checksum(1); + crc32.Update("penguin"); + Assert.AreEqual(0x43b6aa94, crc32.Value); + + } + #endregion + + #region Adler tests + + [Test] + public void Adler_Null() + { + AdlerChecksum adler = new AdlerChecksum(); + Assert.AreEqual(0, adler.Value); + + adler = new AdlerChecksum(1); + Assert.AreEqual( 1, adler.Value ); + + adler = new AdlerChecksum(556); + Assert.AreEqual( 556, adler.Value ); + } + + [Test] + public void Adler_Data() + { + AdlerChecksum adler = new AdlerChecksum(1); + byte[] data = { 1,2,3,4,5,6,7 }; + adler.Update(data); + Assert.AreEqual( 0x5b001d, adler.Value ); + + adler = new AdlerChecksum(); + adler.Update("penguin"); + Assert.AreEqual(0x0bcf02f6, adler.Value ); + + adler = new AdlerChecksum(1); + adler.Update("penguin"); + Assert.AreEqual(0x0bd602f7, adler.Value); + + } + #endregion + } + + [TestFixture] + public class InfoTests + { + #region Info tests + [Test] + public void Info_Version() + { + Info info = new Info(); + Assert.AreEqual("1.2.3", Info.Version); + Assert.AreEqual(32, info.SizeOfUInt); + Assert.AreEqual(32, info.SizeOfULong); + Assert.AreEqual(32, info.SizeOfPointer); + Assert.AreEqual(32, info.SizeOfOffset); + } + #endregion + } + + [TestFixture] + public class DeflateInflateTests + { + #region Deflate tests + [Test] + public void Deflate_Init() + { + using (Deflater def = new Deflater(CompressLevel.Default)) + { + } + } + + private ArrayList compressedData = new ArrayList(); + private uint adler1; + + private ArrayList uncompressedData = new ArrayList(); + private uint adler2; + + public void CDataAvail(byte[] data, int startIndex, int count) + { + for (int i = 0; i < count; ++i) + compressedData.Add(data[i+startIndex]); + } + + [Test] + public void Deflate_Compress() + { + compressedData.Clear(); + + byte[] testData = new byte[35000]; + for (int i = 0; i < testData.Length; ++i) + testData[i] = 5; + + using (Deflater def = new Deflater((CompressLevel)5)) + { + def.DataAvailable += new DataAvailableHandler(CDataAvail); + def.Add(testData); + def.Finish(); + adler1 = def.Checksum; + } + } + #endregion + + #region Inflate tests + [Test] + public void Inflate_Init() + { + using (Inflater inf = new Inflater()) + { + } + } + + private void DDataAvail(byte[] data, int startIndex, int count) + { + for (int i = 0; i < count; ++i) + uncompressedData.Add(data[i+startIndex]); + } + + [Test] + public void Inflate_Expand() + { + uncompressedData.Clear(); + + using (Inflater inf = new Inflater()) + { + inf.DataAvailable += new DataAvailableHandler(DDataAvail); + inf.Add((byte[])compressedData.ToArray(typeof(byte))); + inf.Finish(); + adler2 = inf.Checksum; + } + Assert.AreEqual( adler1, adler2 ); + } + #endregion + } + + [TestFixture] + public class GZipStreamTests + { + #region GZipStream test + [Test] + public void GZipStream_WriteRead() + { + using (GZipStream gzOut = new GZipStream("gzstream.gz", CompressLevel.Best)) + { + BinaryWriter writer = new BinaryWriter(gzOut); + writer.Write("hi there"); + writer.Write(Math.PI); + writer.Write(42); + } + + using (GZipStream gzIn = new GZipStream("gzstream.gz")) + { + BinaryReader reader = new BinaryReader(gzIn); + string s = reader.ReadString(); + Assert.AreEqual("hi there",s); + double d = reader.ReadDouble(); + Assert.AreEqual(Math.PI, d); + int i = reader.ReadInt32(); + Assert.AreEqual(42,i); + } + + } + #endregion + } +} + +#endif \ No newline at end of file Added: external/zlib/contrib/dotzlib/LICENSE_1_0.txt ============================================================================== --- (empty file) +++ external/zlib/contrib/dotzlib/LICENSE_1_0.txt Tue Jan 3 07:42:59 2006 @@ -0,0 +1,23 @@ +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. \ No newline at end of file Added: external/zlib/contrib/dotzlib/readme.txt ============================================================================== --- (empty file) +++ external/zlib/contrib/dotzlib/readme.txt Tue Jan 3 07:42:59 2006 @@ -0,0 +1,58 @@ +This directory contains a .Net wrapper class library for the ZLib1.dll + +The wrapper includes support for inflating/deflating memory buffers, +.Net streaming wrappers for the gz streams part of zlib, and wrappers +for the checksum parts of zlib. See DotZLib/UnitTests.cs for examples. + +Directory structure: +-------------------- + +LICENSE_1_0.txt - License file. +readme.txt - This file. +DotZLib.chm - Class library documentation +DotZLib.build - NAnt build file +DotZLib.sln - Microsoft Visual Studio 2003 solution file + +DotZLib\*.cs - Source files for the class library + +Unit tests: +----------- +The file DotZLib/UnitTests.cs contains unit tests for use with NUnit 2.1 or higher. +To include unit tests in the build, define nunit before building. + + +Build instructions: +------------------- + +1. Using Visual Studio.Net 2003: + Open DotZLib.sln in VS.Net and build from there. Output file (DotZLib.dll) + will be found ./DotZLib/bin/release or ./DotZLib/bin/debug, depending on + you are building the release or debug version of the library. Check + DotZLib/UnitTests.cs for instructions on how to include unit tests in the + build. + +2. Using NAnt: + Open a command prompt with access to the build environment and run nant + in the same directory as the DotZLib.build file. + You can define 2 properties on the nant command-line to control the build: + debug={true|false} to toggle between release/debug builds (default=true). + nunit={true|false} to include or esclude unit tests (default=true). + Also the target clean will remove binaries. + Output file (DotZLib.dll) will be found in either ./DotZLib/bin/release + or ./DotZLib/bin/debug, depending on whether you are building the release + or debug version of the library. + + Examples: + nant -D:debug=false -D:nunit=false + will build a release mode version of the library without unit tests. + nant + will build a debug version of the library with unit tests + nant clean + will remove all previously built files. + + +--------------------------------- +Copyright (c) Henrik Ravn 2004 + +Use, modification and distribution are subject to the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) Added: external/zlib/contrib/infback9/README ============================================================================== --- (empty file) +++ external/zlib/contrib/infback9/README Tue Jan 3 07:42:59 2006 @@ -0,0 +1 @@ +See infback9.h for what this is and how to use it. Added: external/zlib/contrib/infback9/infback9.c ============================================================================== --- (empty file) +++ external/zlib/contrib/infback9/infback9.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,608 @@ +/* infback9.c -- inflate deflate64 data using a call-back interface + * Copyright (C) 1995-2003 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "infback9.h" +#include "inftree9.h" +#include "inflate9.h" + +#define WSIZE 65536UL + +/* + strm provides memory allocation functions in zalloc and zfree, or + Z_NULL to use the library memory allocation functions. + + window is a user-supplied window and output buffer that is 64K bytes. + */ +int ZEXPORT inflateBack9Init_(strm, window, version, stream_size) +z_stream FAR *strm; +unsigned char FAR *window; +const char *version; +int stream_size; +{ + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL || window == Z_NULL) + return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; + } + if (strm->zfree == (free_func)0) strm->zfree = zcfree; + state = (struct inflate_state FAR *)ZALLOC(strm, 1, + sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (voidpf)state; + state->window = window; + return Z_OK; +} + +/* + Build and output length and distance decoding tables for fixed code + decoding. + */ +#ifdef MAKEFIXED +#include + +void makefixed9(void) +{ + unsigned sym, bits, low, size; + code *next, *lenfix, *distfix; + struct inflate_state state; + code fixed[544]; + + /* literal/length table */ + sym = 0; + while (sym < 144) state.lens[sym++] = 8; + while (sym < 256) state.lens[sym++] = 9; + while (sym < 280) state.lens[sym++] = 7; + while (sym < 288) state.lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table9(LENS, state.lens, 288, &(next), &(bits), state.work); + + /* distance table */ + sym = 0; + while (sym < 32) state.lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table9(DISTS, state.lens, 32, &(next), &(bits), state.work); + + /* write tables */ + puts(" /* inffix9.h -- table for decoding deflate64 fixed codes"); + puts(" * Generated automatically by makefixed9()."); + puts(" */"); + puts(""); + puts(" /* WARNING: this file should *not* be used by applications."); + puts(" It is part of the implementation of this library and is"); + puts(" subject to change. Applications should only use zlib.h."); + puts(" */"); + puts(""); + size = 1U << 9; + printf(" static const code lenfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 6) == 0) printf("\n "); + printf("{%u,%u,%d}", lenfix[low].op, lenfix[low].bits, + lenfix[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); + size = 1U << 5; + printf("\n static const code distfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 5) == 0) printf("\n "); + printf("{%u,%u,%d}", distfix[low].op, distfix[low].bits, + distfix[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); +} +#endif /* MAKEFIXED */ + +/* Macros for inflateBack(): */ + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Assure that some input is available. If input is requested, but denied, + then return a Z_BUF_ERROR from inflateBack(). */ +#define PULL() \ + do { \ + if (have == 0) { \ + have = in(in_desc, &next); \ + if (have == 0) { \ + next = Z_NULL; \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflateBack() + with an error if there is no input available. */ +#define PULLBYTE() \ + do { \ + PULL(); \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflateBack() with + an error. */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n <= 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* Assure that some output space is available, by writing out the window + if it's full. If the write fails, return from inflateBack() with a + Z_BUF_ERROR. */ +#define ROOM() \ + do { \ + if (left == 0) { \ + put = window; \ + left = WSIZE; \ + wrap = 1; \ + if (out(out_desc, put, (unsigned)left)) { \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* + strm provides the memory allocation functions and window buffer on input, + and provides information on the unused input on return. For Z_DATA_ERROR + returns, strm will also provide an error message. + + in() and out() are the call-back input and output functions. When + inflateBack() needs more input, it calls in(). When inflateBack() has + filled the window with output, or when it completes with data in the + window, it calls out() to write out the data. The application must not + change the provided input until in() is called again or inflateBack() + returns. The application must not change the window/output buffer until + inflateBack() returns. + + in() and out() are called with a descriptor parameter provided in the + inflateBack() call. This parameter can be a structure that provides the + information required to do the read or write, as well as accumulated + information on the input and output such as totals and check values. + + in() should return zero on failure. out() should return non-zero on + failure. If either in() or out() fails, than inflateBack() returns a + Z_BUF_ERROR. strm->next_in can be checked for Z_NULL to see whether it + was in() or out() that caused in the error. Otherwise, inflateBack() + returns Z_STREAM_END on success, Z_DATA_ERROR for an deflate format + error, or Z_MEM_ERROR if it could not allocate memory for the state. + inflateBack() can also return Z_STREAM_ERROR if the input parameters + are not correct, i.e. strm is Z_NULL or the state was not initialized. + */ +int ZEXPORT inflateBack9(strm, in, in_desc, out, out_desc) +z_stream FAR *strm; +in_func in; +void FAR *in_desc; +out_func out; +void FAR *out_desc; +{ + struct inflate_state FAR *state; + unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have; /* available input */ + unsigned long left; /* available output */ + inflate_mode mode; /* current inflate mode */ + int lastblock; /* true if processing last block */ + int wrap; /* true if the window has wrapped */ + unsigned long write; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if needed */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned extra; /* extra bits needed */ + unsigned long length; /* literal or length of data to copy */ + unsigned long offset; /* distance back to copy string from */ + unsigned long copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code const FAR *lencode; /* starting table for length/literal codes */ + code const FAR *distcode; /* starting table for distance codes */ + unsigned lenbits; /* index bits for lencode */ + unsigned distbits; /* index bits for distcode */ + code this; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; +#include "inffix9.h" + + /* Check that the strm exists and that the state was initialized */ + if (strm == Z_NULL || strm->state == Z_NULL) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* Reset the state */ + strm->msg = Z_NULL; + mode = TYPE; + lastblock = 0; + write = 0; + wrap = 0; + window = state->window; + next = strm->next_in; + have = next != Z_NULL ? strm->avail_in : 0; + hold = 0; + bits = 0; + put = window; + left = WSIZE; + lencode = Z_NULL; + distcode = Z_NULL; + + /* Inflate until end of block marked as last */ + for (;;) + switch (mode) { + case TYPE: + /* determine and dispatch block type */ + if (lastblock) { + BYTEBITS(); + mode = DONE; + break; + } + NEEDBITS(3); + lastblock = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + lastblock ? " (last)" : "")); + mode = STORED; + break; + case 1: /* fixed block */ + lencode = lenfix; + lenbits = 9; + distcode = distfix; + distbits = 5; + Tracev((stderr, "inflate: fixed codes block%s\n", + lastblock ? " (last)" : "")); + mode = LEN; /* decode codes */ + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + lastblock ? " (last)" : "")); + mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + mode = BAD; + } + DROPBITS(2); + break; + + case STORED: + /* get and verify stored block length */ + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + mode = BAD; + break; + } + length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %lu\n", + length)); + INITBITS(); + + /* copy stored block from input to output */ + while (length != 0) { + copy = length; + PULL(); + ROOM(); + if (copy > have) copy = have; + if (copy > left) copy = left; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + length -= copy; + } + Tracev((stderr, "inflate: stored end\n")); + mode = TYPE; + break; + + case TABLE: + /* get dynamic table entries descriptor */ + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); + if (state->nlen > 286) { + strm->msg = (char *)"too many length symbols"; + mode = BAD; + break; + } + Tracev((stderr, "inflate: table sizes ok\n")); + + /* get code length code lengths (not a typo) */ + state->have = 0; + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + lencode = (code const FAR *)(state->next); + lenbits = 7; + ret = inflate_table9(CODES, state->lens, 19, &(state->next), + &(lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + + /* get length and distance code code lengths */ + state->have = 0; + while (state->have < state->nlen + state->ndist) { + for (;;) { + this = lencode[BITS(lenbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if (this.val < 16) { + NEEDBITS(this.bits); + DROPBITS(this.bits); + state->lens[state->have++] = this.val; + } + else { + if (this.val == 16) { + NEEDBITS(this.bits + 2); + DROPBITS(this.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + mode = BAD; + break; + } + len = (unsigned)(state->lens[state->have - 1]); + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (this.val == 17) { + NEEDBITS(this.bits + 3); + DROPBITS(this.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(this.bits + 7); + DROPBITS(this.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (mode == BAD) break; + + /* build code tables */ + state->next = state->codes; + lencode = (code const FAR *)(state->next); + lenbits = 9; + ret = inflate_table9(LENS, state->lens, state->nlen, + &(state->next), &(lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + mode = BAD; + break; + } + distcode = (code const FAR *)(state->next); + distbits = 6; + ret = inflate_table9(DISTS, state->lens + state->nlen, + state->ndist, &(state->next), &(distbits), + state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + mode = LEN; + + case LEN: + /* get a literal, length, or end-of-block code */ + for (;;) { + this = lencode[BITS(lenbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if (this.op && (this.op & 0xf0) == 0) { + last = this; + for (;;) { + this = lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + this.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(this.bits); + length = (unsigned)this.val; + + /* process literal */ + if (this.op == 0) { + Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", this.val)); + ROOM(); + *put++ = (unsigned char)(length); + left--; + mode = LEN; + break; + } + + /* process end of block */ + if (this.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + mode = TYPE; + break; + } + + /* invalid code */ + if (this.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + mode = BAD; + break; + } + + /* length code -- get extra bits, if any */ + extra = (unsigned)(this.op) & 31; + if (extra != 0) { + NEEDBITS(extra); + length += BITS(extra); + DROPBITS(extra); + } + Tracevv((stderr, "inflate: length %lu\n", length)); + + /* get distance code */ + for (;;) { + this = distcode[BITS(distbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if ((this.op & 0xf0) == 0) { + last = this; + for (;;) { + this = distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + this.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(this.bits); + if (this.op & 64) { + strm->msg = (char *)"invalid distance code"; + mode = BAD; + break; + } + offset = (unsigned)this.val; + + /* get distance extra bits, if any */ + extra = (unsigned)(this.op) & 15; + if (extra != 0) { + NEEDBITS(extra); + offset += BITS(extra); + DROPBITS(extra); + } + if (offset > WSIZE - (wrap ? 0: left)) { + strm->msg = (char *)"invalid distance too far back"; + mode = BAD; + break; + } + Tracevv((stderr, "inflate: distance %lu\n", offset)); + + /* copy match from window to output */ + do { + ROOM(); + copy = WSIZE - offset; + if (copy < left) { + from = put + copy; + copy = left - copy; + } + else { + from = put - offset; + copy = left; + } + if (copy > length) copy = length; + length -= copy; + left -= copy; + do { + *put++ = *from++; + } while (--copy); + } while (length != 0); + break; + + case DONE: + /* inflate stream terminated properly -- write leftover output */ + ret = Z_STREAM_END; + if (left < WSIZE) { + if (out(out_desc, window, (unsigned)(WSIZE - left))) + ret = Z_BUF_ERROR; + } + goto inf_leave; + + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + + default: /* can't happen, but makes compilers happy */ + ret = Z_STREAM_ERROR; + goto inf_leave; + } + + /* Return unused input */ + inf_leave: + strm->next_in = next; + strm->avail_in = have; + return ret; +} + +int ZEXPORT inflateBack9End(strm) +z_stream FAR *strm; +{ + if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) + return Z_STREAM_ERROR; + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} Added: external/zlib/contrib/infback9/infback9.h ============================================================================== --- (empty file) +++ external/zlib/contrib/infback9/infback9.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,37 @@ +/* infback9.h -- header for using inflateBack9 functions + * Copyright (C) 2003 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * This header file and associated patches provide a decoder for PKWare's + * undocumented deflate64 compression method (method 9). Use with infback9.c, + * inftree9.h, inftree9.c, and inffix9.h. These patches are not supported. + * This should be compiled with zlib, since it uses zutil.h and zutil.o. + * This code has not yet been tested on 16-bit architectures. See the + * comments in zlib.h for inflateBack() usage. These functions are used + * identically, except that there is no windowBits parameter, and a 64K + * window must be provided. Also if int's are 16 bits, then a zero for + * the third parameter of the "out" function actually means 65536UL. + * zlib.h must be included before this header file. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +ZEXTERN int ZEXPORT inflateBack9 OF((z_stream FAR *strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc)); +ZEXTERN int ZEXPORT inflateBack9End OF((z_stream FAR *strm)); +ZEXTERN int ZEXPORT inflateBack9Init_ OF((z_stream FAR *strm, + unsigned char FAR *window, + const char *version, + int stream_size)); +#define inflateBack9Init(strm, window) \ + inflateBack9Init_((strm), (window), \ + ZLIB_VERSION, sizeof(z_stream)) + +#ifdef __cplusplus +} +#endif Added: external/zlib/contrib/infback9/inffix9.h ============================================================================== --- (empty file) +++ external/zlib/contrib/infback9/inffix9.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,107 @@ + /* inffix9.h -- table for decoding deflate64 fixed codes + * Generated automatically by makefixed9(). + */ + + /* WARNING: this file should *not* be used by applications. + It is part of the implementation of this library and is + subject to change. Applications should only use zlib.h. + */ + + static const code lenfix[512] = { + {96,7,0},{0,8,80},{0,8,16},{132,8,115},{130,7,31},{0,8,112}, + {0,8,48},{0,9,192},{128,7,10},{0,8,96},{0,8,32},{0,9,160}, + {0,8,0},{0,8,128},{0,8,64},{0,9,224},{128,7,6},{0,8,88}, + {0,8,24},{0,9,144},{131,7,59},{0,8,120},{0,8,56},{0,9,208}, + {129,7,17},{0,8,104},{0,8,40},{0,9,176},{0,8,8},{0,8,136}, + {0,8,72},{0,9,240},{128,7,4},{0,8,84},{0,8,20},{133,8,227}, + {131,7,43},{0,8,116},{0,8,52},{0,9,200},{129,7,13},{0,8,100}, + {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232}, + {128,7,8},{0,8,92},{0,8,28},{0,9,152},{132,7,83},{0,8,124}, + {0,8,60},{0,9,216},{130,7,23},{0,8,108},{0,8,44},{0,9,184}, + {0,8,12},{0,8,140},{0,8,76},{0,9,248},{128,7,3},{0,8,82}, + {0,8,18},{133,8,163},{131,7,35},{0,8,114},{0,8,50},{0,9,196}, + {129,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2},{0,8,130}, + {0,8,66},{0,9,228},{128,7,7},{0,8,90},{0,8,26},{0,9,148}, + {132,7,67},{0,8,122},{0,8,58},{0,9,212},{130,7,19},{0,8,106}, + {0,8,42},{0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244}, + {128,7,5},{0,8,86},{0,8,22},{65,8,0},{131,7,51},{0,8,118}, + {0,8,54},{0,9,204},{129,7,15},{0,8,102},{0,8,38},{0,9,172}, + {0,8,6},{0,8,134},{0,8,70},{0,9,236},{128,7,9},{0,8,94}, + {0,8,30},{0,9,156},{132,7,99},{0,8,126},{0,8,62},{0,9,220}, + {130,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142}, + {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{133,8,131}, + {130,7,31},{0,8,113},{0,8,49},{0,9,194},{128,7,10},{0,8,97}, + {0,8,33},{0,9,162},{0,8,1},{0,8,129},{0,8,65},{0,9,226}, + {128,7,6},{0,8,89},{0,8,25},{0,9,146},{131,7,59},{0,8,121}, + {0,8,57},{0,9,210},{129,7,17},{0,8,105},{0,8,41},{0,9,178}, + {0,8,9},{0,8,137},{0,8,73},{0,9,242},{128,7,4},{0,8,85}, + {0,8,21},{144,8,3},{131,7,43},{0,8,117},{0,8,53},{0,9,202}, + {129,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133}, + {0,8,69},{0,9,234},{128,7,8},{0,8,93},{0,8,29},{0,9,154}, + {132,7,83},{0,8,125},{0,8,61},{0,9,218},{130,7,23},{0,8,109}, + {0,8,45},{0,9,186},{0,8,13},{0,8,141},{0,8,77},{0,9,250}, + {128,7,3},{0,8,83},{0,8,19},{133,8,195},{131,7,35},{0,8,115}, + {0,8,51},{0,9,198},{129,7,11},{0,8,99},{0,8,35},{0,9,166}, + {0,8,3},{0,8,131},{0,8,67},{0,9,230},{128,7,7},{0,8,91}, + {0,8,27},{0,9,150},{132,7,67},{0,8,123},{0,8,59},{0,9,214}, + {130,7,19},{0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139}, + {0,8,75},{0,9,246},{128,7,5},{0,8,87},{0,8,23},{77,8,0}, + {131,7,51},{0,8,119},{0,8,55},{0,9,206},{129,7,15},{0,8,103}, + {0,8,39},{0,9,174},{0,8,7},{0,8,135},{0,8,71},{0,9,238}, + {128,7,9},{0,8,95},{0,8,31},{0,9,158},{132,7,99},{0,8,127}, + {0,8,63},{0,9,222},{130,7,27},{0,8,111},{0,8,47},{0,9,190}, + {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80}, + {0,8,16},{132,8,115},{130,7,31},{0,8,112},{0,8,48},{0,9,193}, + {128,7,10},{0,8,96},{0,8,32},{0,9,161},{0,8,0},{0,8,128}, + {0,8,64},{0,9,225},{128,7,6},{0,8,88},{0,8,24},{0,9,145}, + {131,7,59},{0,8,120},{0,8,56},{0,9,209},{129,7,17},{0,8,104}, + {0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72},{0,9,241}, + {128,7,4},{0,8,84},{0,8,20},{133,8,227},{131,7,43},{0,8,116}, + {0,8,52},{0,9,201},{129,7,13},{0,8,100},{0,8,36},{0,9,169}, + {0,8,4},{0,8,132},{0,8,68},{0,9,233},{128,7,8},{0,8,92}, + {0,8,28},{0,9,153},{132,7,83},{0,8,124},{0,8,60},{0,9,217}, + {130,7,23},{0,8,108},{0,8,44},{0,9,185},{0,8,12},{0,8,140}, + {0,8,76},{0,9,249},{128,7,3},{0,8,82},{0,8,18},{133,8,163}, + {131,7,35},{0,8,114},{0,8,50},{0,9,197},{129,7,11},{0,8,98}, + {0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229}, + {128,7,7},{0,8,90},{0,8,26},{0,9,149},{132,7,67},{0,8,122}, + {0,8,58},{0,9,213},{130,7,19},{0,8,106},{0,8,42},{0,9,181}, + {0,8,10},{0,8,138},{0,8,74},{0,9,245},{128,7,5},{0,8,86}, + {0,8,22},{65,8,0},{131,7,51},{0,8,118},{0,8,54},{0,9,205}, + {129,7,15},{0,8,102},{0,8,38},{0,9,173},{0,8,6},{0,8,134}, + {0,8,70},{0,9,237},{128,7,9},{0,8,94},{0,8,30},{0,9,157}, + {132,7,99},{0,8,126},{0,8,62},{0,9,221},{130,7,27},{0,8,110}, + {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253}, + {96,7,0},{0,8,81},{0,8,17},{133,8,131},{130,7,31},{0,8,113}, + {0,8,49},{0,9,195},{128,7,10},{0,8,97},{0,8,33},{0,9,163}, + {0,8,1},{0,8,129},{0,8,65},{0,9,227},{128,7,6},{0,8,89}, + {0,8,25},{0,9,147},{131,7,59},{0,8,121},{0,8,57},{0,9,211}, + {129,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9},{0,8,137}, + {0,8,73},{0,9,243},{128,7,4},{0,8,85},{0,8,21},{144,8,3}, + {131,7,43},{0,8,117},{0,8,53},{0,9,203},{129,7,13},{0,8,101}, + {0,8,37},{0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235}, + {128,7,8},{0,8,93},{0,8,29},{0,9,155},{132,7,83},{0,8,125}, + {0,8,61},{0,9,219},{130,7,23},{0,8,109},{0,8,45},{0,9,187}, + {0,8,13},{0,8,141},{0,8,77},{0,9,251},{128,7,3},{0,8,83}, + {0,8,19},{133,8,195},{131,7,35},{0,8,115},{0,8,51},{0,9,199}, + {129,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131}, + {0,8,67},{0,9,231},{128,7,7},{0,8,91},{0,8,27},{0,9,151}, + {132,7,67},{0,8,123},{0,8,59},{0,9,215},{130,7,19},{0,8,107}, + {0,8,43},{0,9,183},{0,8,11},{0,8,139},{0,8,75},{0,9,247}, + {128,7,5},{0,8,87},{0,8,23},{77,8,0},{131,7,51},{0,8,119}, + {0,8,55},{0,9,207},{129,7,15},{0,8,103},{0,8,39},{0,9,175}, + {0,8,7},{0,8,135},{0,8,71},{0,9,239},{128,7,9},{0,8,95}, + {0,8,31},{0,9,159},{132,7,99},{0,8,127},{0,8,63},{0,9,223}, + {130,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143}, + {0,8,79},{0,9,255} + }; + + static const code distfix[32] = { + {128,5,1},{135,5,257},{131,5,17},{139,5,4097},{129,5,5}, + {137,5,1025},{133,5,65},{141,5,16385},{128,5,3},{136,5,513}, + {132,5,33},{140,5,8193},{130,5,9},{138,5,2049},{134,5,129}, + {142,5,32769},{128,5,2},{135,5,385},{131,5,25},{139,5,6145}, + {129,5,7},{137,5,1537},{133,5,97},{141,5,24577},{128,5,4}, + {136,5,769},{132,5,49},{140,5,12289},{130,5,13},{138,5,3073}, + {134,5,193},{142,5,49153} + }; Added: external/zlib/contrib/infback9/inflate9.h ============================================================================== --- (empty file) +++ external/zlib/contrib/infback9/inflate9.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,47 @@ +/* inflate9.h -- internal inflate state definition + * Copyright (C) 1995-2003 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Possible inflate modes between inflate() calls */ +typedef enum { + TYPE, /* i: waiting for type bits, including last-flag bit */ + STORED, /* i: waiting for stored size (length and complement) */ + TABLE, /* i: waiting for dynamic block table lengths */ + LEN, /* i: waiting for length/lit code */ + DONE, /* finished check, done -- remain here until reset */ + BAD /* got a data error -- remain here until reset */ +} inflate_mode; + +/* + State transitions between above modes - + + (most modes can go to the BAD mode -- not shown for clarity) + + Read deflate blocks: + TYPE -> STORED or TABLE or LEN or DONE + STORED -> TYPE + TABLE -> LENLENS -> CODELENS -> LEN + Read deflate codes: + LEN -> LEN or TYPE + */ + +/* state maintained between inflate() calls. Approximately 7K bytes. */ +struct inflate_state { + /* sliding window */ + unsigned char FAR *window; /* allocated sliding window, if needed */ + /* dynamic table building */ + unsigned ncode; /* number of code length code lengths */ + unsigned nlen; /* number of length code lengths */ + unsigned ndist; /* number of distance code lengths */ + unsigned have; /* number of code lengths in lens[] */ + code FAR *next; /* next available space in codes[] */ + unsigned short lens[320]; /* temporary storage for code lengths */ + unsigned short work[288]; /* work area for code table building */ + code codes[ENOUGH]; /* space for code tables */ +}; Added: external/zlib/contrib/infback9/inftree9.c ============================================================================== --- (empty file) +++ external/zlib/contrib/infback9/inftree9.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,323 @@ +/* inftree9.c -- generate Huffman trees for efficient decoding + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftree9.h" + +#define MAXBITS 15 + +const char inflate9_copyright[] = + " inflate9 1.2.3 Copyright 1995-2005 Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* + Build a set of tables to decode the provided canonical Huffman code. + The code lengths are lens[0..codes-1]. The result starts at *table, + whose indices are 0..2^bits-1. work is a writable array of at least + lens shorts, which is used as a work area. type is the type of code + to be generated, CODES, LENS, or DISTS. On return, zero is success, + -1 is an invalid code, and +1 means that ENOUGH isn't enough. table + on return points to the next available entry's address. bits is the + requested root table index bits, and on return it is the actual root + table index bits. It will differ if the request is greater than the + longest code or if it is less than the shortest code. + */ +int inflate_table9(type, lens, codes, table, bits, work) +codetype type; +unsigned short FAR *lens; +unsigned codes; +code FAR * FAR *table; +unsigned FAR *bits; +unsigned short FAR *work; +{ + unsigned len; /* a code's length in bits */ + unsigned sym; /* index of code symbols */ + unsigned min, max; /* minimum and maximum code lengths */ + unsigned root; /* number of index bits for root table */ + unsigned curr; /* number of index bits for current table */ + unsigned drop; /* code bits to drop for sub-table */ + int left; /* number of prefix codes available */ + unsigned used; /* code entries in table used */ + unsigned huff; /* Huffman code */ + unsigned incr; /* for incrementing code, index */ + unsigned fill; /* index for replicating entries */ + unsigned low; /* low bits for current root entry */ + unsigned mask; /* mask for low root bits */ + code this; /* table entry for duplication */ + code FAR *next; /* next available space in table */ + const unsigned short FAR *base; /* base value table to use */ + const unsigned short FAR *extra; /* extra bits table to use */ + int end; /* use base and extra for symbol > end */ + unsigned short count[MAXBITS+1]; /* number of codes of each length */ + unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ + static const unsigned short lbase[31] = { /* Length codes 257..285 base */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, + 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, + 131, 163, 195, 227, 3, 0, 0}; + static const unsigned short lext[31] = { /* Length codes 257..285 extra */ + 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, + 130, 130, 130, 130, 131, 131, 131, 131, 132, 132, 132, 132, + 133, 133, 133, 133, 144, 201, 196}; + static const unsigned short dbase[32] = { /* Distance codes 0..31 base */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, + 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, + 4097, 6145, 8193, 12289, 16385, 24577, 32769, 49153}; + static const unsigned short dext[32] = { /* Distance codes 0..31 extra */ + 128, 128, 128, 128, 129, 129, 130, 130, 131, 131, 132, 132, + 133, 133, 134, 134, 135, 135, 136, 136, 137, 137, 138, 138, + 139, 139, 140, 140, 141, 141, 142, 142}; + + /* + Process a set of code lengths to create a canonical Huffman code. The + code lengths are lens[0..codes-1]. Each length corresponds to the + symbols 0..codes-1. The Huffman code is generated by first sorting the + symbols by length from short to long, and retaining the symbol order + for codes with equal lengths. Then the code starts with all zero bits + for the first code of the shortest length, and the codes are integer + increments for the same length, and zeros are appended as the length + increases. For the deflate format, these bits are stored backwards + from their more natural integer increment ordering, and so when the + decoding tables are built in the large loop below, the integer codes + are incremented backwards. + + This routine assumes, but does not check, that all of the entries in + lens[] are in the range 0..MAXBITS. The caller must assure this. + 1..MAXBITS is interpreted as that code length. zero means that that + symbol does not occur in this code. + + The codes are sorted by computing a count of codes for each length, + creating from that a table of starting indices for each length in the + sorted table, and then entering the symbols in order in the sorted + table. The sorted table is work[], with that space being provided by + the caller. + + The length counts are used for other purposes as well, i.e. finding + the minimum and maximum length codes, determining if there are any + codes at all, checking for a valid set of lengths, and looking ahead + at length counts to determine sub-table sizes when building the + decoding tables. + */ + + /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ + for (len = 0; len <= MAXBITS; len++) + count[len] = 0; + for (sym = 0; sym < codes; sym++) + count[lens[sym]]++; + + /* bound code lengths, force root to be within code lengths */ + root = *bits; + for (max = MAXBITS; max >= 1; max--) + if (count[max] != 0) break; + if (root > max) root = max; + if (max == 0) return -1; /* no codes! */ + for (min = 1; min <= MAXBITS; min++) + if (count[min] != 0) break; + if (root < min) root = min; + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; + left -= count[len]; + if (left < 0) return -1; /* over-subscribed */ + } + if (left > 0 && (type == CODES || max != 1)) + return -1; /* incomplete set */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + count[len]; + + /* sort symbols by length, by symbol order within each length */ + for (sym = 0; sym < codes; sym++) + if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; + + /* + Create and fill in decoding tables. In this loop, the table being + filled is at next and has curr index bits. The code being used is huff + with length len. That code is converted to an index by dropping drop + bits off of the bottom. For codes where len is less than drop + curr, + those top drop + curr - len bits are incremented through all values to + fill the table with replicated entries. + + root is the number of index bits for the root table. When len exceeds + root, sub-tables are created pointed to by the root entry with an index + of the low root bits of huff. This is saved in low to check for when a + new sub-table should be started. drop is zero when the root table is + being filled, and drop is root when sub-tables are being filled. + + When a new sub-table is needed, it is necessary to look ahead in the + code lengths to determine what size sub-table is needed. The length + counts are used for this, and so count[] is decremented as codes are + entered in the tables. + + used keeps track of how many table entries have been allocated from the + provided *table space. It is checked when a LENS table is being made + against the space in *table, ENOUGH, minus the maximum space needed by + the worst case distance code, MAXD. This should never happen, but the + sufficiency of ENOUGH has not been proven exhaustively, hence the check. + This assumes that when type == LENS, bits == 9. + + sym increments through all symbols, and the loop terminates when + all codes of length max, i.e. all codes, have been processed. This + routine permits incomplete codes, so another loop after this one fills + in the rest of the decoding tables with invalid code markers. + */ + + /* set up for code type */ + switch (type) { + case CODES: + base = extra = work; /* dummy value--not used */ + end = 19; + break; + case LENS: + base = lbase; + base -= 257; + extra = lext; + extra -= 257; + end = 256; + break; + default: /* DISTS */ + base = dbase; + extra = dext; + end = -1; + } + + /* initialize state for loop */ + huff = 0; /* starting code */ + sym = 0; /* starting code symbol */ + len = min; /* starting code length */ + next = *table; /* current table to fill in */ + curr = root; /* current table index bits */ + drop = 0; /* current bits to drop from code for index */ + low = (unsigned)(-1); /* trigger new sub-table when len > root */ + used = 1U << root; /* use root table entries */ + mask = used - 1; /* mask for comparing low */ + + /* check available table space */ + if (type == LENS && used >= ENOUGH - MAXD) + return 1; + + /* process all codes and make table entries */ + for (;;) { + /* create table entry */ + this.bits = (unsigned char)(len - drop); + if ((int)(work[sym]) < end) { + this.op = (unsigned char)0; + this.val = work[sym]; + } + else if ((int)(work[sym]) > end) { + this.op = (unsigned char)(extra[work[sym]]); + this.val = base[work[sym]]; + } + else { + this.op = (unsigned char)(32 + 64); /* end of block */ + this.val = 0; + } + + /* replicate for those indices with low len bits equal to huff */ + incr = 1U << (len - drop); + fill = 1U << curr; + do { + fill -= incr; + next[(huff >> drop) + fill] = this; + } while (fill != 0); + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + + /* go to next symbol, update count, len */ + sym++; + if (--(count[len]) == 0) { + if (len == max) break; + len = lens[work[sym]]; + } + + /* create new sub-table if needed */ + if (len > root && (huff & mask) != low) { + /* if first time, transition to sub-tables */ + if (drop == 0) + drop = root; + + /* increment past last table */ + next += 1U << curr; + + /* determine length of next table */ + curr = len - drop; + left = (int)(1 << curr); + while (curr + drop < max) { + left -= count[curr + drop]; + if (left <= 0) break; + curr++; + left <<= 1; + } + + /* check for enough space */ + used += 1U << curr; + if (type == LENS && used >= ENOUGH - MAXD) + return 1; + + /* point entry in root table to sub-table */ + low = huff & mask; + (*table)[low].op = (unsigned char)curr; + (*table)[low].bits = (unsigned char)root; + (*table)[low].val = (unsigned short)(next - *table); + } + } + + /* + Fill in rest of table for incomplete codes. This loop is similar to the + loop above in incrementing huff for table indices. It is assumed that + len is equal to curr + drop, so there is no loop needed to increment + through high index bits. When the current sub-table is filled, the loop + drops back to the root table to fill in any remaining entries there. + */ + this.op = (unsigned char)64; /* invalid code marker */ + this.bits = (unsigned char)(len - drop); + this.val = (unsigned short)0; + while (huff != 0) { + /* when done with sub-table, drop back to root table */ + if (drop != 0 && (huff & mask) != low) { + drop = 0; + len = root; + next = *table; + curr = root; + this.bits = (unsigned char)len; + } + + /* put invalid code marker in table */ + next[huff >> drop] = this; + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + } + + /* set return parameters */ + *table += used; + *bits = root; + return 0; +} Added: external/zlib/contrib/infback9/inftree9.h ============================================================================== --- (empty file) +++ external/zlib/contrib/infback9/inftree9.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,55 @@ +/* inftree9.h -- header to use inftree9.c + * Copyright (C) 1995-2003 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Structure for decoding tables. Each entry provides either the + information needed to do the operation requested by the code that + indexed that table entry, or it provides a pointer to another + table that indexes more bits of the code. op indicates whether + the entry is a pointer to another table, a literal, a length or + distance, an end-of-block, or an invalid code. For a table + pointer, the low four bits of op is the number of index bits of + that table. For a length or distance, the low four bits of op + is the number of extra bits to get after the code. bits is + the number of bits in this code or part of the code to drop off + of the bit buffer. val is the actual byte to output in the case + of a literal, the base length or distance, or the offset from + the current table to the next table. Each entry is four bytes. */ +typedef struct { + unsigned char op; /* operation, extra bits, table bits */ + unsigned char bits; /* bits in this part of the code */ + unsigned short val; /* offset in table or code value */ +} code; + +/* op values as set by inflate_table(): + 00000000 - literal + 0000tttt - table link, tttt != 0 is the number of table index bits + 100eeeee - length or distance, eeee is the number of extra bits + 01100000 - end of block + 01000000 - invalid code + */ + +/* Maximum size of dynamic tree. The maximum found in a long but non- + exhaustive search was 1444 code structures (852 for length/literals + and 592 for distances, the latter actually the result of an + exhaustive search). The true maximum is not known, but the value + below is more than safe. */ +#define ENOUGH 2048 +#define MAXD 592 + +/* Type of code to build for inftable() */ +typedef enum { + CODES, + LENS, + DISTS +} codetype; + +extern int inflate_table9 OF((codetype type, unsigned short FAR *lens, + unsigned codes, code FAR * FAR *table, + unsigned FAR *bits, unsigned short FAR *work)); Added: external/zlib/contrib/inflate86/inffas86.c ============================================================================== --- (empty file) +++ external/zlib/contrib/inflate86/inffas86.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,1157 @@ +/* inffas86.c is a hand tuned assembler version of + * + * inffast.c -- fast decoding + * Copyright (C) 1995-2003 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Copyright (C) 2003 Chris Anderson + * Please use the copyright conditions above. + * + * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also + * slightly quicker on x86 systems because, instead of using rep movsb to copy + * data, it uses rep movsw, which moves data in 2-byte chunks instead of single + * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates + * from http://fedora.linux.duke.edu/fc1_x86_64 + * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with + * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version, + * when decompressing mozilla-source-1.3.tar.gz. + * + * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from + * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at + * the moment. I have successfully compiled and tested this code with gcc2.96, + * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S + * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX + * enabled. I will attempt to merge the MMX code into this version. Newer + * versions of this and inffast.S can be found at + * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/ + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +/* Mark Adler's comments from inffast.c: */ + +/* + Decode literal, length, and distance codes and write out the resulting + literal and match bytes until either not enough input or output is + available, an end-of-block is encountered, or a data error is encountered. + When large enough input and output buffers are supplied to inflate(), for + example, a 16K input buffer and a 64K output buffer, more than 95% of the + inflate execution time is spent in this routine. + + Entry assumptions: + + state->mode == LEN + strm->avail_in >= 6 + strm->avail_out >= 258 + start >= strm->avail_out + state->bits < 8 + + On return, state->mode is one of: + + LEN -- ran out of enough output space or enough available input + TYPE -- reached end of block code, inflate() to interpret next block + BAD -- error in block data + + Notes: + + - The maximum input bits used by a length/distance pair is 15 bits for the + length code, 5 bits for the length extra, 15 bits for the distance code, + and 13 bits for the distance extra. This totals 48 bits, or six bytes. + Therefore if strm->avail_in >= 6, then there is enough input to avoid + checking for available input while decoding. + + - The maximum bytes that a single length/distance pair can output is 258 + bytes, which is the maximum length that can be coded. inflate_fast() + requires strm->avail_out >= 258 for each loop to avoid checking for + output space. + */ +void inflate_fast(strm, start) +z_streamp strm; +unsigned start; /* inflate()'s starting value for strm->avail_out */ +{ + struct inflate_state FAR *state; + struct inffast_ar { +/* 64 32 x86 x86_64 */ +/* ar offset register */ +/* 0 0 */ void *esp; /* esp save */ +/* 8 4 */ void *ebp; /* ebp save */ +/* 16 8 */ unsigned char FAR *in; /* esi rsi local strm->next_in */ +/* 24 12 */ unsigned char FAR *last; /* r9 while in < last */ +/* 32 16 */ unsigned char FAR *out; /* edi rdi local strm->next_out */ +/* 40 20 */ unsigned char FAR *beg; /* inflate()'s init next_out */ +/* 48 24 */ unsigned char FAR *end; /* r10 while out < end */ +/* 56 28 */ unsigned char FAR *window;/* size of window, wsize!=0 */ +/* 64 32 */ code const FAR *lcode; /* ebp rbp local strm->lencode */ +/* 72 36 */ code const FAR *dcode; /* r11 local strm->distcode */ +/* 80 40 */ unsigned long hold; /* edx rdx local strm->hold */ +/* 88 44 */ unsigned bits; /* ebx rbx local strm->bits */ +/* 92 48 */ unsigned wsize; /* window size */ +/* 96 52 */ unsigned write; /* window write index */ +/*100 56 */ unsigned lmask; /* r12 mask for lcode */ +/*104 60 */ unsigned dmask; /* r13 mask for dcode */ +/*108 64 */ unsigned len; /* r14 match length */ +/*112 68 */ unsigned dist; /* r15 match distance */ +/*116 72 */ unsigned status; /* set when state chng*/ + } ar; + +#if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 ) +#define PAD_AVAIL_IN 6 +#define PAD_AVAIL_OUT 258 +#else +#define PAD_AVAIL_IN 5 +#define PAD_AVAIL_OUT 257 +#endif + + /* copy state to local variables */ + state = (struct inflate_state FAR *)strm->state; + ar.in = strm->next_in; + ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN); + ar.out = strm->next_out; + ar.beg = ar.out - (start - strm->avail_out); + ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT); + ar.wsize = state->wsize; + ar.write = state->write; + ar.window = state->window; + ar.hold = state->hold; + ar.bits = state->bits; + ar.lcode = state->lencode; + ar.dcode = state->distcode; + ar.lmask = (1U << state->lenbits) - 1; + ar.dmask = (1U << state->distbits) - 1; + + /* decode literals and length/distances until end-of-block or not enough + input data or output space */ + + /* align in on 1/2 hold size boundary */ + while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) { + ar.hold += (unsigned long)*ar.in++ << ar.bits; + ar.bits += 8; + } + +#if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 ) + __asm__ __volatile__ ( +" leaq %0, %%rax\n" +" movq %%rbp, 8(%%rax)\n" /* save regs rbp and rsp */ +" movq %%rsp, (%%rax)\n" +" movq %%rax, %%rsp\n" /* make rsp point to &ar */ +" movq 16(%%rsp), %%rsi\n" /* rsi = in */ +" movq 32(%%rsp), %%rdi\n" /* rdi = out */ +" movq 24(%%rsp), %%r9\n" /* r9 = last */ +" movq 48(%%rsp), %%r10\n" /* r10 = end */ +" movq 64(%%rsp), %%rbp\n" /* rbp = lcode */ +" movq 72(%%rsp), %%r11\n" /* r11 = dcode */ +" movq 80(%%rsp), %%rdx\n" /* rdx = hold */ +" movl 88(%%rsp), %%ebx\n" /* ebx = bits */ +" movl 100(%%rsp), %%r12d\n" /* r12d = lmask */ +" movl 104(%%rsp), %%r13d\n" /* r13d = dmask */ + /* r14d = len */ + /* r15d = dist */ +" cld\n" +" cmpq %%rdi, %%r10\n" +" je .L_one_time\n" /* if only one decode left */ +" cmpq %%rsi, %%r9\n" +" je .L_one_time\n" +" jmp .L_do_loop\n" + +".L_one_time:\n" +" movq %%r12, %%r8\n" /* r8 = lmask */ +" cmpb $32, %%bl\n" +" ja .L_get_length_code_one_time\n" + +" lodsl\n" /* eax = *(uint *)in++ */ +" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ +" addb $32, %%bl\n" /* bits += 32 */ +" shlq %%cl, %%rax\n" +" orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */ +" jmp .L_get_length_code_one_time\n" + +".align 32,0x90\n" +".L_while_test:\n" +" cmpq %%rdi, %%r10\n" +" jbe .L_break_loop\n" +" cmpq %%rsi, %%r9\n" +" jbe .L_break_loop\n" + +".L_do_loop:\n" +" movq %%r12, %%r8\n" /* r8 = lmask */ +" cmpb $32, %%bl\n" +" ja .L_get_length_code\n" /* if (32 < bits) */ + +" lodsl\n" /* eax = *(uint *)in++ */ +" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ +" addb $32, %%bl\n" /* bits += 32 */ +" shlq %%cl, %%rax\n" +" orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */ + +".L_get_length_code:\n" +" andq %%rdx, %%r8\n" /* r8 &= hold */ +" movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */ + +" movb %%ah, %%cl\n" /* cl = this.bits */ +" subb %%ah, %%bl\n" /* bits -= this.bits */ +" shrq %%cl, %%rdx\n" /* hold >>= this.bits */ + +" testb %%al, %%al\n" +" jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */ + +" movq %%r12, %%r8\n" /* r8 = lmask */ +" shrl $16, %%eax\n" /* output this.val char */ +" stosb\n" + +".L_get_length_code_one_time:\n" +" andq %%rdx, %%r8\n" /* r8 &= hold */ +" movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */ + +".L_dolen:\n" +" movb %%ah, %%cl\n" /* cl = this.bits */ +" subb %%ah, %%bl\n" /* bits -= this.bits */ +" shrq %%cl, %%rdx\n" /* hold >>= this.bits */ + +" testb %%al, %%al\n" +" jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */ + +" shrl $16, %%eax\n" /* output this.val char */ +" stosb\n" +" jmp .L_while_test\n" + +".align 32,0x90\n" +".L_test_for_length_base:\n" +" movl %%eax, %%r14d\n" /* len = this */ +" shrl $16, %%r14d\n" /* len = this.val */ +" movb %%al, %%cl\n" + +" testb $16, %%al\n" +" jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */ +" andb $15, %%cl\n" /* op &= 15 */ +" jz .L_decode_distance\n" /* if (!op) */ + +".L_add_bits_to_len:\n" +" subb %%cl, %%bl\n" +" xorl %%eax, %%eax\n" +" incl %%eax\n" +" shll %%cl, %%eax\n" +" decl %%eax\n" +" andl %%edx, %%eax\n" /* eax &= hold */ +" shrq %%cl, %%rdx\n" +" addl %%eax, %%r14d\n" /* len += hold & mask[op] */ + +".L_decode_distance:\n" +" movq %%r13, %%r8\n" /* r8 = dmask */ +" cmpb $32, %%bl\n" +" ja .L_get_distance_code\n" /* if (32 < bits) */ + +" lodsl\n" /* eax = *(uint *)in++ */ +" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ +" addb $32, %%bl\n" /* bits += 32 */ +" shlq %%cl, %%rax\n" +" orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */ + +".L_get_distance_code:\n" +" andq %%rdx, %%r8\n" /* r8 &= hold */ +" movl (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */ + +".L_dodist:\n" +" movl %%eax, %%r15d\n" /* dist = this */ +" shrl $16, %%r15d\n" /* dist = this.val */ +" movb %%ah, %%cl\n" +" subb %%ah, %%bl\n" /* bits -= this.bits */ +" shrq %%cl, %%rdx\n" /* hold >>= this.bits */ +" movb %%al, %%cl\n" /* cl = this.op */ + +" testb $16, %%al\n" /* if ((op & 16) == 0) */ +" jz .L_test_for_second_level_dist\n" +" andb $15, %%cl\n" /* op &= 15 */ +" jz .L_check_dist_one\n" + +".L_add_bits_to_dist:\n" +" subb %%cl, %%bl\n" +" xorl %%eax, %%eax\n" +" incl %%eax\n" +" shll %%cl, %%eax\n" +" decl %%eax\n" /* (1 << op) - 1 */ +" andl %%edx, %%eax\n" /* eax &= hold */ +" shrq %%cl, %%rdx\n" +" addl %%eax, %%r15d\n" /* dist += hold & ((1 << op) - 1) */ + +".L_check_window:\n" +" movq %%rsi, %%r8\n" /* save in so from can use it's reg */ +" movq %%rdi, %%rax\n" +" subq 40(%%rsp), %%rax\n" /* nbytes = out - beg */ + +" cmpl %%r15d, %%eax\n" +" jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */ + +" movl %%r14d, %%ecx\n" /* ecx = len */ +" movq %%rdi, %%rsi\n" +" subq %%r15, %%rsi\n" /* from = out - dist */ + +" sarl %%ecx\n" +" jnc .L_copy_two\n" /* if len % 2 == 0 */ + +" rep movsw\n" +" movb (%%rsi), %%al\n" +" movb %%al, (%%rdi)\n" +" incq %%rdi\n" + +" movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */ +" jmp .L_while_test\n" + +".L_copy_two:\n" +" rep movsw\n" +" movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */ +" jmp .L_while_test\n" + +".align 32,0x90\n" +".L_check_dist_one:\n" +" cmpl $1, %%r15d\n" /* if dist 1, is a memset */ +" jne .L_check_window\n" +" cmpq %%rdi, 40(%%rsp)\n" /* if out == beg, outside window */ +" je .L_check_window\n" + +" movl %%r14d, %%ecx\n" /* ecx = len */ +" movb -1(%%rdi), %%al\n" +" movb %%al, %%ah\n" + +" sarl %%ecx\n" +" jnc .L_set_two\n" +" movb %%al, (%%rdi)\n" +" incq %%rdi\n" + +".L_set_two:\n" +" rep stosw\n" +" jmp .L_while_test\n" + +".align 32,0x90\n" +".L_test_for_second_level_length:\n" +" testb $64, %%al\n" +" jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */ + +" xorl %%eax, %%eax\n" +" incl %%eax\n" +" shll %%cl, %%eax\n" +" decl %%eax\n" +" andl %%edx, %%eax\n" /* eax &= hold */ +" addl %%r14d, %%eax\n" /* eax += len */ +" movl (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/ +" jmp .L_dolen\n" + +".align 32,0x90\n" +".L_test_for_second_level_dist:\n" +" testb $64, %%al\n" +" jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */ + +" xorl %%eax, %%eax\n" +" incl %%eax\n" +" shll %%cl, %%eax\n" +" decl %%eax\n" +" andl %%edx, %%eax\n" /* eax &= hold */ +" addl %%r15d, %%eax\n" /* eax += dist */ +" movl (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/ +" jmp .L_dodist\n" + +".align 32,0x90\n" +".L_clip_window:\n" +" movl %%eax, %%ecx\n" /* ecx = nbytes */ +" movl 92(%%rsp), %%eax\n" /* eax = wsize, prepare for dist cmp */ +" negl %%ecx\n" /* nbytes = -nbytes */ + +" cmpl %%r15d, %%eax\n" +" jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */ + +" addl %%r15d, %%ecx\n" /* nbytes = dist - nbytes */ +" cmpl $0, 96(%%rsp)\n" +" jne .L_wrap_around_window\n" /* if (write != 0) */ + +" movq 56(%%rsp), %%rsi\n" /* from = window */ +" subl %%ecx, %%eax\n" /* eax -= nbytes */ +" addq %%rax, %%rsi\n" /* from += wsize - nbytes */ + +" movl %%r14d, %%eax\n" /* eax = len */ +" cmpl %%ecx, %%r14d\n" +" jbe .L_do_copy\n" /* if (nbytes >= len) */ + +" subl %%ecx, %%eax\n" /* eax -= nbytes */ +" rep movsb\n" +" movq %%rdi, %%rsi\n" +" subq %%r15, %%rsi\n" /* from = &out[ -dist ] */ +" jmp .L_do_copy\n" + +".align 32,0x90\n" +".L_wrap_around_window:\n" +" movl 96(%%rsp), %%eax\n" /* eax = write */ +" cmpl %%eax, %%ecx\n" +" jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */ + +" movl 92(%%rsp), %%esi\n" /* from = wsize */ +" addq 56(%%rsp), %%rsi\n" /* from += window */ +" addq %%rax, %%rsi\n" /* from += write */ +" subq %%rcx, %%rsi\n" /* from -= nbytes */ +" subl %%eax, %%ecx\n" /* nbytes -= write */ + +" movl %%r14d, %%eax\n" /* eax = len */ +" cmpl %%ecx, %%eax\n" +" jbe .L_do_copy\n" /* if (nbytes >= len) */ + +" subl %%ecx, %%eax\n" /* len -= nbytes */ +" rep movsb\n" +" movq 56(%%rsp), %%rsi\n" /* from = window */ +" movl 96(%%rsp), %%ecx\n" /* nbytes = write */ +" cmpl %%ecx, %%eax\n" +" jbe .L_do_copy\n" /* if (nbytes >= len) */ + +" subl %%ecx, %%eax\n" /* len -= nbytes */ +" rep movsb\n" +" movq %%rdi, %%rsi\n" +" subq %%r15, %%rsi\n" /* from = out - dist */ +" jmp .L_do_copy\n" + +".align 32,0x90\n" +".L_contiguous_in_window:\n" +" movq 56(%%rsp), %%rsi\n" /* rsi = window */ +" addq %%rax, %%rsi\n" +" subq %%rcx, %%rsi\n" /* from += write - nbytes */ + +" movl %%r14d, %%eax\n" /* eax = len */ +" cmpl %%ecx, %%eax\n" +" jbe .L_do_copy\n" /* if (nbytes >= len) */ + +" subl %%ecx, %%eax\n" /* len -= nbytes */ +" rep movsb\n" +" movq %%rdi, %%rsi\n" +" subq %%r15, %%rsi\n" /* from = out - dist */ +" jmp .L_do_copy\n" /* if (nbytes >= len) */ + +".align 32,0x90\n" +".L_do_copy:\n" +" movl %%eax, %%ecx\n" /* ecx = len */ +" rep movsb\n" + +" movq %%r8, %%rsi\n" /* move in back to %esi, toss from */ +" jmp .L_while_test\n" + +".L_test_for_end_of_block:\n" +" testb $32, %%al\n" +" jz .L_invalid_literal_length_code\n" +" movl $1, 116(%%rsp)\n" +" jmp .L_break_loop_with_status\n" + +".L_invalid_literal_length_code:\n" +" movl $2, 116(%%rsp)\n" +" jmp .L_break_loop_with_status\n" + +".L_invalid_distance_code:\n" +" movl $3, 116(%%rsp)\n" +" jmp .L_break_loop_with_status\n" + +".L_invalid_distance_too_far:\n" +" movl $4, 116(%%rsp)\n" +" jmp .L_break_loop_with_status\n" + +".L_break_loop:\n" +" movl $0, 116(%%rsp)\n" + +".L_break_loop_with_status:\n" +/* put in, out, bits, and hold back into ar and pop esp */ +" movq %%rsi, 16(%%rsp)\n" /* in */ +" movq %%rdi, 32(%%rsp)\n" /* out */ +" movl %%ebx, 88(%%rsp)\n" /* bits */ +" movq %%rdx, 80(%%rsp)\n" /* hold */ +" movq (%%rsp), %%rax\n" /* restore rbp and rsp */ +" movq 8(%%rsp), %%rbp\n" +" movq %%rax, %%rsp\n" + : + : "m" (ar) + : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" + ); +#elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 ) + __asm__ __volatile__ ( +" leal %0, %%eax\n" +" movl %%esp, (%%eax)\n" /* save esp, ebp */ +" movl %%ebp, 4(%%eax)\n" +" movl %%eax, %%esp\n" +" movl 8(%%esp), %%esi\n" /* esi = in */ +" movl 16(%%esp), %%edi\n" /* edi = out */ +" movl 40(%%esp), %%edx\n" /* edx = hold */ +" movl 44(%%esp), %%ebx\n" /* ebx = bits */ +" movl 32(%%esp), %%ebp\n" /* ebp = lcode */ + +" cld\n" +" jmp .L_do_loop\n" + +".align 32,0x90\n" +".L_while_test:\n" +" cmpl %%edi, 24(%%esp)\n" /* out < end */ +" jbe .L_break_loop\n" +" cmpl %%esi, 12(%%esp)\n" /* in < last */ +" jbe .L_break_loop\n" + +".L_do_loop:\n" +" cmpb $15, %%bl\n" +" ja .L_get_length_code\n" /* if (15 < bits) */ + +" xorl %%eax, %%eax\n" +" lodsw\n" /* al = *(ushort *)in++ */ +" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ +" addb $16, %%bl\n" /* bits += 16 */ +" shll %%cl, %%eax\n" +" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ + +".L_get_length_code:\n" +" movl 56(%%esp), %%eax\n" /* eax = lmask */ +" andl %%edx, %%eax\n" /* eax &= hold */ +" movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */ + +".L_dolen:\n" +" movb %%ah, %%cl\n" /* cl = this.bits */ +" subb %%ah, %%bl\n" /* bits -= this.bits */ +" shrl %%cl, %%edx\n" /* hold >>= this.bits */ + +" testb %%al, %%al\n" +" jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */ + +" shrl $16, %%eax\n" /* output this.val char */ +" stosb\n" +" jmp .L_while_test\n" + +".align 32,0x90\n" +".L_test_for_length_base:\n" +" movl %%eax, %%ecx\n" /* len = this */ +" shrl $16, %%ecx\n" /* len = this.val */ +" movl %%ecx, 64(%%esp)\n" /* save len */ +" movb %%al, %%cl\n" + +" testb $16, %%al\n" +" jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */ +" andb $15, %%cl\n" /* op &= 15 */ +" jz .L_decode_distance\n" /* if (!op) */ +" cmpb %%cl, %%bl\n" +" jae .L_add_bits_to_len\n" /* if (op <= bits) */ + +" movb %%cl, %%ch\n" /* stash op in ch, freeing cl */ +" xorl %%eax, %%eax\n" +" lodsw\n" /* al = *(ushort *)in++ */ +" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ +" addb $16, %%bl\n" /* bits += 16 */ +" shll %%cl, %%eax\n" +" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ +" movb %%ch, %%cl\n" /* move op back to ecx */ + +".L_add_bits_to_len:\n" +" subb %%cl, %%bl\n" +" xorl %%eax, %%eax\n" +" incl %%eax\n" +" shll %%cl, %%eax\n" +" decl %%eax\n" +" andl %%edx, %%eax\n" /* eax &= hold */ +" shrl %%cl, %%edx\n" +" addl %%eax, 64(%%esp)\n" /* len += hold & mask[op] */ + +".L_decode_distance:\n" +" cmpb $15, %%bl\n" +" ja .L_get_distance_code\n" /* if (15 < bits) */ + +" xorl %%eax, %%eax\n" +" lodsw\n" /* al = *(ushort *)in++ */ +" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ +" addb $16, %%bl\n" /* bits += 16 */ +" shll %%cl, %%eax\n" +" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ + +".L_get_distance_code:\n" +" movl 60(%%esp), %%eax\n" /* eax = dmask */ +" movl 36(%%esp), %%ecx\n" /* ecx = dcode */ +" andl %%edx, %%eax\n" /* eax &= hold */ +" movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */ + +".L_dodist:\n" +" movl %%eax, %%ebp\n" /* dist = this */ +" shrl $16, %%ebp\n" /* dist = this.val */ +" movb %%ah, %%cl\n" +" subb %%ah, %%bl\n" /* bits -= this.bits */ +" shrl %%cl, %%edx\n" /* hold >>= this.bits */ +" movb %%al, %%cl\n" /* cl = this.op */ + +" testb $16, %%al\n" /* if ((op & 16) == 0) */ +" jz .L_test_for_second_level_dist\n" +" andb $15, %%cl\n" /* op &= 15 */ +" jz .L_check_dist_one\n" +" cmpb %%cl, %%bl\n" +" jae .L_add_bits_to_dist\n" /* if (op <= bits) 97.6% */ + +" movb %%cl, %%ch\n" /* stash op in ch, freeing cl */ +" xorl %%eax, %%eax\n" +" lodsw\n" /* al = *(ushort *)in++ */ +" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ +" addb $16, %%bl\n" /* bits += 16 */ +" shll %%cl, %%eax\n" +" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ +" movb %%ch, %%cl\n" /* move op back to ecx */ + +".L_add_bits_to_dist:\n" +" subb %%cl, %%bl\n" +" xorl %%eax, %%eax\n" +" incl %%eax\n" +" shll %%cl, %%eax\n" +" decl %%eax\n" /* (1 << op) - 1 */ +" andl %%edx, %%eax\n" /* eax &= hold */ +" shrl %%cl, %%edx\n" +" addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */ + +".L_check_window:\n" +" movl %%esi, 8(%%esp)\n" /* save in so from can use it's reg */ +" movl %%edi, %%eax\n" +" subl 20(%%esp), %%eax\n" /* nbytes = out - beg */ + +" cmpl %%ebp, %%eax\n" +" jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */ + +" movl 64(%%esp), %%ecx\n" /* ecx = len */ +" movl %%edi, %%esi\n" +" subl %%ebp, %%esi\n" /* from = out - dist */ + +" sarl %%ecx\n" +" jnc .L_copy_two\n" /* if len % 2 == 0 */ + +" rep movsw\n" +" movb (%%esi), %%al\n" +" movb %%al, (%%edi)\n" +" incl %%edi\n" + +" movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */ +" movl 32(%%esp), %%ebp\n" /* ebp = lcode */ +" jmp .L_while_test\n" + +".L_copy_two:\n" +" rep movsw\n" +" movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */ +" movl 32(%%esp), %%ebp\n" /* ebp = lcode */ +" jmp .L_while_test\n" + +".align 32,0x90\n" +".L_check_dist_one:\n" +" cmpl $1, %%ebp\n" /* if dist 1, is a memset */ +" jne .L_check_window\n" +" cmpl %%edi, 20(%%esp)\n" +" je .L_check_window\n" /* out == beg, if outside window */ + +" movl 64(%%esp), %%ecx\n" /* ecx = len */ +" movb -1(%%edi), %%al\n" +" movb %%al, %%ah\n" + +" sarl %%ecx\n" +" jnc .L_set_two\n" +" movb %%al, (%%edi)\n" +" incl %%edi\n" + +".L_set_two:\n" +" rep stosw\n" +" movl 32(%%esp), %%ebp\n" /* ebp = lcode */ +" jmp .L_while_test\n" + +".align 32,0x90\n" +".L_test_for_second_level_length:\n" +" testb $64, %%al\n" +" jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */ + +" xorl %%eax, %%eax\n" +" incl %%eax\n" +" shll %%cl, %%eax\n" +" decl %%eax\n" +" andl %%edx, %%eax\n" /* eax &= hold */ +" addl 64(%%esp), %%eax\n" /* eax += len */ +" movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/ +" jmp .L_dolen\n" + +".align 32,0x90\n" +".L_test_for_second_level_dist:\n" +" testb $64, %%al\n" +" jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */ + +" xorl %%eax, %%eax\n" +" incl %%eax\n" +" shll %%cl, %%eax\n" +" decl %%eax\n" +" andl %%edx, %%eax\n" /* eax &= hold */ +" addl %%ebp, %%eax\n" /* eax += dist */ +" movl 36(%%esp), %%ecx\n" /* ecx = dcode */ +" movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/ +" jmp .L_dodist\n" + +".align 32,0x90\n" +".L_clip_window:\n" +" movl %%eax, %%ecx\n" +" movl 48(%%esp), %%eax\n" /* eax = wsize */ +" negl %%ecx\n" /* nbytes = -nbytes */ +" movl 28(%%esp), %%esi\n" /* from = window */ + +" cmpl %%ebp, %%eax\n" +" jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */ + +" addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */ +" cmpl $0, 52(%%esp)\n" +" jne .L_wrap_around_window\n" /* if (write != 0) */ + +" subl %%ecx, %%eax\n" +" addl %%eax, %%esi\n" /* from += wsize - nbytes */ + +" movl 64(%%esp), %%eax\n" /* eax = len */ +" cmpl %%ecx, %%eax\n" +" jbe .L_do_copy\n" /* if (nbytes >= len) */ + +" subl %%ecx, %%eax\n" /* len -= nbytes */ +" rep movsb\n" +" movl %%edi, %%esi\n" +" subl %%ebp, %%esi\n" /* from = out - dist */ +" jmp .L_do_copy\n" + +".align 32,0x90\n" +".L_wrap_around_window:\n" +" movl 52(%%esp), %%eax\n" /* eax = write */ +" cmpl %%eax, %%ecx\n" +" jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */ + +" addl 48(%%esp), %%esi\n" /* from += wsize */ +" addl %%eax, %%esi\n" /* from += write */ +" subl %%ecx, %%esi\n" /* from -= nbytes */ +" subl %%eax, %%ecx\n" /* nbytes -= write */ + +" movl 64(%%esp), %%eax\n" /* eax = len */ +" cmpl %%ecx, %%eax\n" +" jbe .L_do_copy\n" /* if (nbytes >= len) */ + +" subl %%ecx, %%eax\n" /* len -= nbytes */ +" rep movsb\n" +" movl 28(%%esp), %%esi\n" /* from = window */ +" movl 52(%%esp), %%ecx\n" /* nbytes = write */ +" cmpl %%ecx, %%eax\n" +" jbe .L_do_copy\n" /* if (nbytes >= len) */ + +" subl %%ecx, %%eax\n" /* len -= nbytes */ +" rep movsb\n" +" movl %%edi, %%esi\n" +" subl %%ebp, %%esi\n" /* from = out - dist */ +" jmp .L_do_copy\n" + +".align 32,0x90\n" +".L_contiguous_in_window:\n" +" addl %%eax, %%esi\n" +" subl %%ecx, %%esi\n" /* from += write - nbytes */ + +" movl 64(%%esp), %%eax\n" /* eax = len */ +" cmpl %%ecx, %%eax\n" +" jbe .L_do_copy\n" /* if (nbytes >= len) */ + +" subl %%ecx, %%eax\n" /* len -= nbytes */ +" rep movsb\n" +" movl %%edi, %%esi\n" +" subl %%ebp, %%esi\n" /* from = out - dist */ +" jmp .L_do_copy\n" /* if (nbytes >= len) */ + +".align 32,0x90\n" +".L_do_copy:\n" +" movl %%eax, %%ecx\n" +" rep movsb\n" + +" movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */ +" movl 32(%%esp), %%ebp\n" /* ebp = lcode */ +" jmp .L_while_test\n" + +".L_test_for_end_of_block:\n" +" testb $32, %%al\n" +" jz .L_invalid_literal_length_code\n" +" movl $1, 72(%%esp)\n" +" jmp .L_break_loop_with_status\n" + +".L_invalid_literal_length_code:\n" +" movl $2, 72(%%esp)\n" +" jmp .L_break_loop_with_status\n" + +".L_invalid_distance_code:\n" +" movl $3, 72(%%esp)\n" +" jmp .L_break_loop_with_status\n" + +".L_invalid_distance_too_far:\n" +" movl 8(%%esp), %%esi\n" +" movl $4, 72(%%esp)\n" +" jmp .L_break_loop_with_status\n" + +".L_break_loop:\n" +" movl $0, 72(%%esp)\n" + +".L_break_loop_with_status:\n" +/* put in, out, bits, and hold back into ar and pop esp */ +" movl %%esi, 8(%%esp)\n" /* save in */ +" movl %%edi, 16(%%esp)\n" /* save out */ +" movl %%ebx, 44(%%esp)\n" /* save bits */ +" movl %%edx, 40(%%esp)\n" /* save hold */ +" movl 4(%%esp), %%ebp\n" /* restore esp, ebp */ +" movl (%%esp), %%esp\n" + : + : "m" (ar) + : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" + ); +#elif defined( _MSC_VER ) && ! defined( _M_AMD64 ) + __asm { + lea eax, ar + mov [eax], esp /* save esp, ebp */ + mov [eax+4], ebp + mov esp, eax + mov esi, [esp+8] /* esi = in */ + mov edi, [esp+16] /* edi = out */ + mov edx, [esp+40] /* edx = hold */ + mov ebx, [esp+44] /* ebx = bits */ + mov ebp, [esp+32] /* ebp = lcode */ + + cld + jmp L_do_loop + +ALIGN 4 +L_while_test: + cmp [esp+24], edi + jbe L_break_loop + cmp [esp+12], esi + jbe L_break_loop + +L_do_loop: + cmp bl, 15 + ja L_get_length_code /* if (15 < bits) */ + + xor eax, eax + lodsw /* al = *(ushort *)in++ */ + mov cl, bl /* cl = bits, needs it for shifting */ + add bl, 16 /* bits += 16 */ + shl eax, cl + or edx, eax /* hold |= *((ushort *)in)++ << bits */ + +L_get_length_code: + mov eax, [esp+56] /* eax = lmask */ + and eax, edx /* eax &= hold */ + mov eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */ + +L_dolen: + mov cl, ah /* cl = this.bits */ + sub bl, ah /* bits -= this.bits */ + shr edx, cl /* hold >>= this.bits */ + + test al, al + jnz L_test_for_length_base /* if (op != 0) 45.7% */ + + shr eax, 16 /* output this.val char */ + stosb + jmp L_while_test + +ALIGN 4 +L_test_for_length_base: + mov ecx, eax /* len = this */ + shr ecx, 16 /* len = this.val */ + mov [esp+64], ecx /* save len */ + mov cl, al + + test al, 16 + jz L_test_for_second_level_length /* if ((op & 16) == 0) 8% */ + and cl, 15 /* op &= 15 */ + jz L_decode_distance /* if (!op) */ + cmp bl, cl + jae L_add_bits_to_len /* if (op <= bits) */ + + mov ch, cl /* stash op in ch, freeing cl */ + xor eax, eax + lodsw /* al = *(ushort *)in++ */ + mov cl, bl /* cl = bits, needs it for shifting */ + add bl, 16 /* bits += 16 */ + shl eax, cl + or edx, eax /* hold |= *((ushort *)in)++ << bits */ + mov cl, ch /* move op back to ecx */ + +L_add_bits_to_len: + sub bl, cl + xor eax, eax + inc eax + shl eax, cl + dec eax + and eax, edx /* eax &= hold */ + shr edx, cl + add [esp+64], eax /* len += hold & mask[op] */ + +L_decode_distance: + cmp bl, 15 + ja L_get_distance_code /* if (15 < bits) */ + + xor eax, eax + lodsw /* al = *(ushort *)in++ */ + mov cl, bl /* cl = bits, needs it for shifting */ + add bl, 16 /* bits += 16 */ + shl eax, cl + or edx, eax /* hold |= *((ushort *)in)++ << bits */ + +L_get_distance_code: + mov eax, [esp+60] /* eax = dmask */ + mov ecx, [esp+36] /* ecx = dcode */ + and eax, edx /* eax &= hold */ + mov eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */ + +L_dodist: + mov ebp, eax /* dist = this */ + shr ebp, 16 /* dist = this.val */ + mov cl, ah + sub bl, ah /* bits -= this.bits */ + shr edx, cl /* hold >>= this.bits */ + mov cl, al /* cl = this.op */ + + test al, 16 /* if ((op & 16) == 0) */ + jz L_test_for_second_level_dist + and cl, 15 /* op &= 15 */ + jz L_check_dist_one + cmp bl, cl + jae L_add_bits_to_dist /* if (op <= bits) 97.6% */ + + mov ch, cl /* stash op in ch, freeing cl */ + xor eax, eax + lodsw /* al = *(ushort *)in++ */ + mov cl, bl /* cl = bits, needs it for shifting */ + add bl, 16 /* bits += 16 */ + shl eax, cl + or edx, eax /* hold |= *((ushort *)in)++ << bits */ + mov cl, ch /* move op back to ecx */ + +L_add_bits_to_dist: + sub bl, cl + xor eax, eax + inc eax + shl eax, cl + dec eax /* (1 << op) - 1 */ + and eax, edx /* eax &= hold */ + shr edx, cl + add ebp, eax /* dist += hold & ((1 << op) - 1) */ + +L_check_window: + mov [esp+8], esi /* save in so from can use it's reg */ + mov eax, edi + sub eax, [esp+20] /* nbytes = out - beg */ + + cmp eax, ebp + jb L_clip_window /* if (dist > nbytes) 4.2% */ + + mov ecx, [esp+64] /* ecx = len */ + mov esi, edi + sub esi, ebp /* from = out - dist */ + + sar ecx, 1 + jnc L_copy_two + + rep movsw + mov al, [esi] + mov [edi], al + inc edi + + mov esi, [esp+8] /* move in back to %esi, toss from */ + mov ebp, [esp+32] /* ebp = lcode */ + jmp L_while_test + +L_copy_two: + rep movsw + mov esi, [esp+8] /* move in back to %esi, toss from */ + mov ebp, [esp+32] /* ebp = lcode */ + jmp L_while_test + +ALIGN 4 +L_check_dist_one: + cmp ebp, 1 /* if dist 1, is a memset */ + jne L_check_window + cmp [esp+20], edi + je L_check_window /* out == beg, if outside window */ + + mov ecx, [esp+64] /* ecx = len */ + mov al, [edi-1] + mov ah, al + + sar ecx, 1 + jnc L_set_two + mov [edi], al /* memset out with from[-1] */ + inc edi + +L_set_two: + rep stosw + mov ebp, [esp+32] /* ebp = lcode */ + jmp L_while_test + +ALIGN 4 +L_test_for_second_level_length: + test al, 64 + jnz L_test_for_end_of_block /* if ((op & 64) != 0) */ + + xor eax, eax + inc eax + shl eax, cl + dec eax + and eax, edx /* eax &= hold */ + add eax, [esp+64] /* eax += len */ + mov eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/ + jmp L_dolen + +ALIGN 4 +L_test_for_second_level_dist: + test al, 64 + jnz L_invalid_distance_code /* if ((op & 64) != 0) */ + + xor eax, eax + inc eax + shl eax, cl + dec eax + and eax, edx /* eax &= hold */ + add eax, ebp /* eax += dist */ + mov ecx, [esp+36] /* ecx = dcode */ + mov eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/ + jmp L_dodist + +ALIGN 4 +L_clip_window: + mov ecx, eax + mov eax, [esp+48] /* eax = wsize */ + neg ecx /* nbytes = -nbytes */ + mov esi, [esp+28] /* from = window */ + + cmp eax, ebp + jb L_invalid_distance_too_far /* if (dist > wsize) */ + + add ecx, ebp /* nbytes = dist - nbytes */ + cmp dword ptr [esp+52], 0 + jne L_wrap_around_window /* if (write != 0) */ + + sub eax, ecx + add esi, eax /* from += wsize - nbytes */ + + mov eax, [esp+64] /* eax = len */ + cmp eax, ecx + jbe L_do_copy /* if (nbytes >= len) */ + + sub eax, ecx /* len -= nbytes */ + rep movsb + mov esi, edi + sub esi, ebp /* from = out - dist */ + jmp L_do_copy + +ALIGN 4 +L_wrap_around_window: + mov eax, [esp+52] /* eax = write */ + cmp ecx, eax + jbe L_contiguous_in_window /* if (write >= nbytes) */ + + add esi, [esp+48] /* from += wsize */ + add esi, eax /* from += write */ + sub esi, ecx /* from -= nbytes */ + sub ecx, eax /* nbytes -= write */ + + mov eax, [esp+64] /* eax = len */ + cmp eax, ecx + jbe L_do_copy /* if (nbytes >= len) */ + + sub eax, ecx /* len -= nbytes */ + rep movsb + mov esi, [esp+28] /* from = window */ + mov ecx, [esp+52] /* nbytes = write */ + cmp eax, ecx + jbe L_do_copy /* if (nbytes >= len) */ + + sub eax, ecx /* len -= nbytes */ + rep movsb + mov esi, edi + sub esi, ebp /* from = out - dist */ + jmp L_do_copy + +ALIGN 4 +L_contiguous_in_window: + add esi, eax + sub esi, ecx /* from += write - nbytes */ + + mov eax, [esp+64] /* eax = len */ + cmp eax, ecx + jbe L_do_copy /* if (nbytes >= len) */ + + sub eax, ecx /* len -= nbytes */ + rep movsb + mov esi, edi + sub esi, ebp /* from = out - dist */ + jmp L_do_copy + +ALIGN 4 +L_do_copy: + mov ecx, eax + rep movsb + + mov esi, [esp+8] /* move in back to %esi, toss from */ + mov ebp, [esp+32] /* ebp = lcode */ + jmp L_while_test + +L_test_for_end_of_block: + test al, 32 + jz L_invalid_literal_length_code + mov dword ptr [esp+72], 1 + jmp L_break_loop_with_status + +L_invalid_literal_length_code: + mov dword ptr [esp+72], 2 + jmp L_break_loop_with_status + +L_invalid_distance_code: + mov dword ptr [esp+72], 3 + jmp L_break_loop_with_status + +L_invalid_distance_too_far: + mov esi, [esp+4] + mov dword ptr [esp+72], 4 + jmp L_break_loop_with_status + +L_break_loop: + mov dword ptr [esp+72], 0 + +L_break_loop_with_status: +/* put in, out, bits, and hold back into ar and pop esp */ + mov [esp+8], esi /* save in */ + mov [esp+16], edi /* save out */ + mov [esp+44], ebx /* save bits */ + mov [esp+40], edx /* save hold */ + mov ebp, [esp+4] /* restore esp, ebp */ + mov esp, [esp] + } +#else +#error "x86 architecture not defined" +#endif + + if (ar.status > 1) { + if (ar.status == 2) + strm->msg = "invalid literal/length code"; + else if (ar.status == 3) + strm->msg = "invalid distance code"; + else + strm->msg = "invalid distance too far back"; + state->mode = BAD; + } + else if ( ar.status == 1 ) { + state->mode = TYPE; + } + + /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ + ar.len = ar.bits >> 3; + ar.in -= ar.len; + ar.bits -= ar.len << 3; + ar.hold &= (1U << ar.bits) - 1; + + /* update state and return */ + strm->next_in = ar.in; + strm->next_out = ar.out; + strm->avail_in = (unsigned)(ar.in < ar.last ? + PAD_AVAIL_IN + (ar.last - ar.in) : + PAD_AVAIL_IN - (ar.in - ar.last)); + strm->avail_out = (unsigned)(ar.out < ar.end ? + PAD_AVAIL_OUT + (ar.end - ar.out) : + PAD_AVAIL_OUT - (ar.out - ar.end)); + state->hold = ar.hold; + state->bits = ar.bits; + return; +} + Added: external/zlib/contrib/inflate86/inffast.S ============================================================================== --- (empty file) +++ external/zlib/contrib/inflate86/inffast.S Tue Jan 3 07:42:59 2006 @@ -0,0 +1,1368 @@ +/* + * inffast.S is a hand tuned assembler version of: + * + * inffast.c -- fast decoding + * Copyright (C) 1995-2003 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Copyright (C) 2003 Chris Anderson + * Please use the copyright conditions above. + * + * This version (Jan-23-2003) of inflate_fast was coded and tested under + * GNU/Linux on a pentium 3, using the gcc-3.2 compiler distribution. On that + * machine, I found that gzip style archives decompressed about 20% faster than + * the gcc-3.2 -O3 -fomit-frame-pointer compiled version. Your results will + * depend on how large of a buffer is used for z_stream.next_in & next_out + * (8K-32K worked best for my 256K cpu cache) and how much overhead there is in + * stream processing I/O and crc32/addler32. In my case, this routine used + * 70% of the cpu time and crc32 used 20%. + * + * I am confident that this version will work in the general case, but I have + * not tested a wide variety of datasets or a wide variety of platforms. + * + * Jan-24-2003 -- Added -DUSE_MMX define for slightly faster inflating. + * It should be a runtime flag instead of compile time flag... + * + * Jan-26-2003 -- Added runtime check for MMX support with cpuid instruction. + * With -DUSE_MMX, only MMX code is compiled. With -DNO_MMX, only non-MMX code + * is compiled. Without either option, runtime detection is enabled. Runtime + * detection should work on all modern cpus and the recomended algorithm (flip + * ID bit on eflags and then use the cpuid instruction) is used in many + * multimedia applications. Tested under win2k with gcc-2.95 and gas-2.12 + * distributed with cygwin3. Compiling with gcc-2.95 -c inffast.S -o + * inffast.obj generates a COFF object which can then be linked with MSVC++ + * compiled code. Tested under FreeBSD 4.7 with gcc-2.95. + * + * Jan-28-2003 -- Tested Athlon XP... MMX mode is slower than no MMX (and + * slower than compiler generated code). Adjusted cpuid check to use the MMX + * code only for Pentiums < P4 until I have more data on the P4. Speed + * improvment is only about 15% on the Athlon when compared with code generated + * with MSVC++. Not sure yet, but I think the P4 will also be slower using the + * MMX mode because many of it's x86 ALU instructions execute in .5 cycles and + * have less latency than MMX ops. Added code to buffer the last 11 bytes of + * the input stream since the MMX code grabs bits in chunks of 32, which + * differs from the inffast.c algorithm. I don't think there would have been + * read overruns where a page boundary was crossed (a segfault), but there + * could have been overruns when next_in ends on unaligned memory (unintialized + * memory read). + * + * Mar-13-2003 -- P4 MMX is slightly slower than P4 NO_MMX. I created a C + * version of the non-MMX code so that it doesn't depend on zstrm and zstate + * structure offsets which are hard coded in this file. This was last tested + * with zlib-1.2.0 which is currently in beta testing, newer versions of this + * and inffas86.c can be found at http://www.eetbeetee.com/zlib/ and + * http://www.charm.net/~christop/zlib/ + */ + + +/* + * if you have underscore linking problems (_inflate_fast undefined), try + * using -DGAS_COFF + */ +#if ! defined( GAS_COFF ) && ! defined( GAS_ELF ) + +#if defined( WIN32 ) || defined( __CYGWIN__ ) +#define GAS_COFF /* windows object format */ +#else +#define GAS_ELF +#endif + +#endif /* ! GAS_COFF && ! GAS_ELF */ + + +#if defined( GAS_COFF ) + +/* coff externals have underscores */ +#define inflate_fast _inflate_fast +#define inflate_fast_use_mmx _inflate_fast_use_mmx + +#endif /* GAS_COFF */ + + +.file "inffast.S" + +.globl inflate_fast + +.text +.align 4,0 +.L_invalid_literal_length_code_msg: +.string "invalid literal/length code" + +.align 4,0 +.L_invalid_distance_code_msg: +.string "invalid distance code" + +.align 4,0 +.L_invalid_distance_too_far_msg: +.string "invalid distance too far back" + +#if ! defined( NO_MMX ) +.align 4,0 +.L_mask: /* mask[N] = ( 1 << N ) - 1 */ +.long 0 +.long 1 +.long 3 +.long 7 +.long 15 +.long 31 +.long 63 +.long 127 +.long 255 +.long 511 +.long 1023 +.long 2047 +.long 4095 +.long 8191 +.long 16383 +.long 32767 +.long 65535 +.long 131071 +.long 262143 +.long 524287 +.long 1048575 +.long 2097151 +.long 4194303 +.long 8388607 +.long 16777215 +.long 33554431 +.long 67108863 +.long 134217727 +.long 268435455 +.long 536870911 +.long 1073741823 +.long 2147483647 +.long 4294967295 +#endif /* NO_MMX */ + +.text + +/* + * struct z_stream offsets, in zlib.h + */ +#define next_in_strm 0 /* strm->next_in */ +#define avail_in_strm 4 /* strm->avail_in */ +#define next_out_strm 12 /* strm->next_out */ +#define avail_out_strm 16 /* strm->avail_out */ +#define msg_strm 24 /* strm->msg */ +#define state_strm 28 /* strm->state */ + +/* + * struct inflate_state offsets, in inflate.h + */ +#define mode_state 0 /* state->mode */ +#define wsize_state 32 /* state->wsize */ +#define write_state 40 /* state->write */ +#define window_state 44 /* state->window */ +#define hold_state 48 /* state->hold */ +#define bits_state 52 /* state->bits */ +#define lencode_state 68 /* state->lencode */ +#define distcode_state 72 /* state->distcode */ +#define lenbits_state 76 /* state->lenbits */ +#define distbits_state 80 /* state->distbits */ + +/* + * inflate_fast's activation record + */ +#define local_var_size 64 /* how much local space for vars */ +#define strm_sp 88 /* first arg: z_stream * (local_var_size + 24) */ +#define start_sp 92 /* second arg: unsigned int (local_var_size + 28) */ + +/* + * offsets for local vars on stack + */ +#define out 60 /* unsigned char* */ +#define window 56 /* unsigned char* */ +#define wsize 52 /* unsigned int */ +#define write 48 /* unsigned int */ +#define in 44 /* unsigned char* */ +#define beg 40 /* unsigned char* */ +#define buf 28 /* char[ 12 ] */ +#define len 24 /* unsigned int */ +#define last 20 /* unsigned char* */ +#define end 16 /* unsigned char* */ +#define dcode 12 /* code* */ +#define lcode 8 /* code* */ +#define dmask 4 /* unsigned int */ +#define lmask 0 /* unsigned int */ + +/* + * typedef enum inflate_mode consts, in inflate.h + */ +#define INFLATE_MODE_TYPE 11 /* state->mode flags enum-ed in inflate.h */ +#define INFLATE_MODE_BAD 26 + + +#if ! defined( USE_MMX ) && ! defined( NO_MMX ) + +#define RUN_TIME_MMX + +#define CHECK_MMX 1 +#define DO_USE_MMX 2 +#define DONT_USE_MMX 3 + +.globl inflate_fast_use_mmx + +.data + +.align 4,0 +inflate_fast_use_mmx: /* integer flag for run time control 1=check,2=mmx,3=no */ +.long CHECK_MMX + +#if defined( GAS_ELF ) +/* elf info */ +.type inflate_fast_use_mmx, at object +.size inflate_fast_use_mmx,4 +#endif + +#endif /* RUN_TIME_MMX */ + +#if defined( GAS_COFF ) +/* coff info: scl 2 = extern, type 32 = function */ +.def inflate_fast; .scl 2; .type 32; .endef +#endif + +.text + +.align 32,0x90 +inflate_fast: + pushl %edi + pushl %esi + pushl %ebp + pushl %ebx + pushf /* save eflags (strm_sp, state_sp assumes this is 32 bits) */ + subl $local_var_size, %esp + cld + +#define strm_r %esi +#define state_r %edi + + movl strm_sp(%esp), strm_r + movl state_strm(strm_r), state_r + + /* in = strm->next_in; + * out = strm->next_out; + * last = in + strm->avail_in - 11; + * beg = out - (start - strm->avail_out); + * end = out + (strm->avail_out - 257); + */ + movl avail_in_strm(strm_r), %edx + movl next_in_strm(strm_r), %eax + + addl %eax, %edx /* avail_in += next_in */ + subl $11, %edx /* avail_in -= 11 */ + + movl %eax, in(%esp) + movl %edx, last(%esp) + + movl start_sp(%esp), %ebp + movl avail_out_strm(strm_r), %ecx + movl next_out_strm(strm_r), %ebx + + subl %ecx, %ebp /* start -= avail_out */ + negl %ebp /* start = -start */ + addl %ebx, %ebp /* start += next_out */ + + subl $257, %ecx /* avail_out -= 257 */ + addl %ebx, %ecx /* avail_out += out */ + + movl %ebx, out(%esp) + movl %ebp, beg(%esp) + movl %ecx, end(%esp) + + /* wsize = state->wsize; + * write = state->write; + * window = state->window; + * hold = state->hold; + * bits = state->bits; + * lcode = state->lencode; + * dcode = state->distcode; + * lmask = ( 1 << state->lenbits ) - 1; + * dmask = ( 1 << state->distbits ) - 1; + */ + + movl lencode_state(state_r), %eax + movl distcode_state(state_r), %ecx + + movl %eax, lcode(%esp) + movl %ecx, dcode(%esp) + + movl $1, %eax + movl lenbits_state(state_r), %ecx + shll %cl, %eax + decl %eax + movl %eax, lmask(%esp) + + movl $1, %eax + movl distbits_state(state_r), %ecx + shll %cl, %eax + decl %eax + movl %eax, dmask(%esp) + + movl wsize_state(state_r), %eax + movl write_state(state_r), %ecx + movl window_state(state_r), %edx + + movl %eax, wsize(%esp) + movl %ecx, write(%esp) + movl %edx, window(%esp) + + movl hold_state(state_r), %ebp + movl bits_state(state_r), %ebx + +#undef strm_r +#undef state_r + +#define in_r %esi +#define from_r %esi +#define out_r %edi + + movl in(%esp), in_r + movl last(%esp), %ecx + cmpl in_r, %ecx + ja .L_align_long /* if in < last */ + + addl $11, %ecx /* ecx = &in[ avail_in ] */ + subl in_r, %ecx /* ecx = avail_in */ + movl $12, %eax + subl %ecx, %eax /* eax = 12 - avail_in */ + leal buf(%esp), %edi + rep movsb /* memcpy( buf, in, avail_in ) */ + movl %eax, %ecx + xorl %eax, %eax + rep stosb /* memset( &buf[ avail_in ], 0, 12 - avail_in ) */ + leal buf(%esp), in_r /* in = buf */ + movl in_r, last(%esp) /* last = in, do just one iteration */ + jmp .L_is_aligned + + /* align in_r on long boundary */ +.L_align_long: + testl $3, in_r + jz .L_is_aligned + xorl %eax, %eax + movb (in_r), %al + incl in_r + movl %ebx, %ecx + addl $8, %ebx + shll %cl, %eax + orl %eax, %ebp + jmp .L_align_long + +.L_is_aligned: + movl out(%esp), out_r + +#if defined( NO_MMX ) + jmp .L_do_loop +#endif + +#if defined( USE_MMX ) + jmp .L_init_mmx +#endif + +/*** Runtime MMX check ***/ + +#if defined( RUN_TIME_MMX ) +.L_check_mmx: + cmpl $DO_USE_MMX, inflate_fast_use_mmx + je .L_init_mmx + ja .L_do_loop /* > 2 */ + + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + pushf + movl (%esp), %eax /* copy eflags to eax */ + xorl $0x200000, (%esp) /* try toggling ID bit of eflags (bit 21) + * to see if cpu supports cpuid... + * ID bit method not supported by NexGen but + * bios may load a cpuid instruction and + * cpuid may be disabled on Cyrix 5-6x86 */ + popf + pushf + popl %edx /* copy new eflags to edx */ + xorl %eax, %edx /* test if ID bit is flipped */ + jz .L_dont_use_mmx /* not flipped if zero */ + xorl %eax, %eax + cpuid + cmpl $0x756e6547, %ebx /* check for GenuineIntel in ebx,ecx,edx */ + jne .L_dont_use_mmx + cmpl $0x6c65746e, %ecx + jne .L_dont_use_mmx + cmpl $0x49656e69, %edx + jne .L_dont_use_mmx + movl $1, %eax + cpuid /* get cpu features */ + shrl $8, %eax + andl $15, %eax + cmpl $6, %eax /* check for Pentium family, is 0xf for P4 */ + jne .L_dont_use_mmx + testl $0x800000, %edx /* test if MMX feature is set (bit 23) */ + jnz .L_use_mmx + jmp .L_dont_use_mmx +.L_use_mmx: + movl $DO_USE_MMX, inflate_fast_use_mmx + jmp .L_check_mmx_pop +.L_dont_use_mmx: + movl $DONT_USE_MMX, inflate_fast_use_mmx +.L_check_mmx_pop: + popl %edx + popl %ecx + popl %ebx + popl %eax + jmp .L_check_mmx +#endif + + +/*** Non-MMX code ***/ + +#if defined ( NO_MMX ) || defined( RUN_TIME_MMX ) + +#define hold_r %ebp +#define bits_r %bl +#define bitslong_r %ebx + +.align 32,0x90 +.L_while_test: + /* while (in < last && out < end) + */ + cmpl out_r, end(%esp) + jbe .L_break_loop /* if (out >= end) */ + + cmpl in_r, last(%esp) + jbe .L_break_loop + +.L_do_loop: + /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out + * + * do { + * if (bits < 15) { + * hold |= *((unsigned short *)in)++ << bits; + * bits += 16 + * } + * this = lcode[hold & lmask] + */ + cmpb $15, bits_r + ja .L_get_length_code /* if (15 < bits) */ + + xorl %eax, %eax + lodsw /* al = *(ushort *)in++ */ + movb bits_r, %cl /* cl = bits, needs it for shifting */ + addb $16, bits_r /* bits += 16 */ + shll %cl, %eax + orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */ + +.L_get_length_code: + movl lmask(%esp), %edx /* edx = lmask */ + movl lcode(%esp), %ecx /* ecx = lcode */ + andl hold_r, %edx /* edx &= hold */ + movl (%ecx,%edx,4), %eax /* eax = lcode[hold & lmask] */ + +.L_dolen: + /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out + * + * dolen: + * bits -= this.bits; + * hold >>= this.bits + */ + movb %ah, %cl /* cl = this.bits */ + subb %ah, bits_r /* bits -= this.bits */ + shrl %cl, hold_r /* hold >>= this.bits */ + + /* check if op is a literal + * if (op == 0) { + * PUP(out) = this.val; + * } + */ + testb %al, %al + jnz .L_test_for_length_base /* if (op != 0) 45.7% */ + + shrl $16, %eax /* output this.val char */ + stosb + jmp .L_while_test + +.L_test_for_length_base: + /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = len + * + * else if (op & 16) { + * len = this.val + * op &= 15 + * if (op) { + * if (op > bits) { + * hold |= *((unsigned short *)in)++ << bits; + * bits += 16 + * } + * len += hold & mask[op]; + * bits -= op; + * hold >>= op; + * } + */ +#define len_r %edx + movl %eax, len_r /* len = this */ + shrl $16, len_r /* len = this.val */ + movb %al, %cl + + testb $16, %al + jz .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */ + andb $15, %cl /* op &= 15 */ + jz .L_save_len /* if (!op) */ + cmpb %cl, bits_r + jae .L_add_bits_to_len /* if (op <= bits) */ + + movb %cl, %ch /* stash op in ch, freeing cl */ + xorl %eax, %eax + lodsw /* al = *(ushort *)in++ */ + movb bits_r, %cl /* cl = bits, needs it for shifting */ + addb $16, bits_r /* bits += 16 */ + shll %cl, %eax + orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */ + movb %ch, %cl /* move op back to ecx */ + +.L_add_bits_to_len: + movl $1, %eax + shll %cl, %eax + decl %eax + subb %cl, bits_r + andl hold_r, %eax /* eax &= hold */ + shrl %cl, hold_r + addl %eax, len_r /* len += hold & mask[op] */ + +.L_save_len: + movl len_r, len(%esp) /* save len */ +#undef len_r + +.L_decode_distance: + /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = dist + * + * if (bits < 15) { + * hold |= *((unsigned short *)in)++ << bits; + * bits += 16 + * } + * this = dcode[hold & dmask]; + * dodist: + * bits -= this.bits; + * hold >>= this.bits; + * op = this.op; + */ + + cmpb $15, bits_r + ja .L_get_distance_code /* if (15 < bits) */ + + xorl %eax, %eax + lodsw /* al = *(ushort *)in++ */ + movb bits_r, %cl /* cl = bits, needs it for shifting */ + addb $16, bits_r /* bits += 16 */ + shll %cl, %eax + orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */ + +.L_get_distance_code: + movl dmask(%esp), %edx /* edx = dmask */ + movl dcode(%esp), %ecx /* ecx = dcode */ + andl hold_r, %edx /* edx &= hold */ + movl (%ecx,%edx,4), %eax /* eax = dcode[hold & dmask] */ + +#define dist_r %edx +.L_dodist: + movl %eax, dist_r /* dist = this */ + shrl $16, dist_r /* dist = this.val */ + movb %ah, %cl + subb %ah, bits_r /* bits -= this.bits */ + shrl %cl, hold_r /* hold >>= this.bits */ + + /* if (op & 16) { + * dist = this.val + * op &= 15 + * if (op > bits) { + * hold |= *((unsigned short *)in)++ << bits; + * bits += 16 + * } + * dist += hold & mask[op]; + * bits -= op; + * hold >>= op; + */ + movb %al, %cl /* cl = this.op */ + + testb $16, %al /* if ((op & 16) == 0) */ + jz .L_test_for_second_level_dist + andb $15, %cl /* op &= 15 */ + jz .L_check_dist_one + cmpb %cl, bits_r + jae .L_add_bits_to_dist /* if (op <= bits) 97.6% */ + + movb %cl, %ch /* stash op in ch, freeing cl */ + xorl %eax, %eax + lodsw /* al = *(ushort *)in++ */ + movb bits_r, %cl /* cl = bits, needs it for shifting */ + addb $16, bits_r /* bits += 16 */ + shll %cl, %eax + orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */ + movb %ch, %cl /* move op back to ecx */ + +.L_add_bits_to_dist: + movl $1, %eax + shll %cl, %eax + decl %eax /* (1 << op) - 1 */ + subb %cl, bits_r + andl hold_r, %eax /* eax &= hold */ + shrl %cl, hold_r + addl %eax, dist_r /* dist += hold & ((1 << op) - 1) */ + jmp .L_check_window + +.L_check_window: + /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist + * %ecx = nbytes + * + * nbytes = out - beg; + * if (dist <= nbytes) { + * from = out - dist; + * do { + * PUP(out) = PUP(from); + * } while (--len > 0) { + * } + */ + + movl in_r, in(%esp) /* save in so from can use it's reg */ + movl out_r, %eax + subl beg(%esp), %eax /* nbytes = out - beg */ + + cmpl dist_r, %eax + jb .L_clip_window /* if (dist > nbytes) 4.2% */ + + movl len(%esp), %ecx + movl out_r, from_r + subl dist_r, from_r /* from = out - dist */ + + subl $3, %ecx + movb (from_r), %al + movb %al, (out_r) + movb 1(from_r), %al + movb 2(from_r), %dl + addl $3, from_r + movb %al, 1(out_r) + movb %dl, 2(out_r) + addl $3, out_r + rep movsb + + movl in(%esp), in_r /* move in back to %esi, toss from */ + jmp .L_while_test + +.align 16,0x90 +.L_check_dist_one: + cmpl $1, dist_r + jne .L_check_window + cmpl out_r, beg(%esp) + je .L_check_window + + decl out_r + movl len(%esp), %ecx + movb (out_r), %al + subl $3, %ecx + + movb %al, 1(out_r) + movb %al, 2(out_r) + movb %al, 3(out_r) + addl $4, out_r + rep stosb + + jmp .L_while_test + +.align 16,0x90 +.L_test_for_second_level_length: + /* else if ((op & 64) == 0) { + * this = lcode[this.val + (hold & mask[op])]; + * } + */ + testb $64, %al + jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */ + + movl $1, %eax + shll %cl, %eax + decl %eax + andl hold_r, %eax /* eax &= hold */ + addl %edx, %eax /* eax += this.val */ + movl lcode(%esp), %edx /* edx = lcode */ + movl (%edx,%eax,4), %eax /* eax = lcode[val + (hold&mask[op])] */ + jmp .L_dolen + +.align 16,0x90 +.L_test_for_second_level_dist: + /* else if ((op & 64) == 0) { + * this = dcode[this.val + (hold & mask[op])]; + * } + */ + testb $64, %al + jnz .L_invalid_distance_code /* if ((op & 64) != 0) */ + + movl $1, %eax + shll %cl, %eax + decl %eax + andl hold_r, %eax /* eax &= hold */ + addl %edx, %eax /* eax += this.val */ + movl dcode(%esp), %edx /* edx = dcode */ + movl (%edx,%eax,4), %eax /* eax = dcode[val + (hold&mask[op])] */ + jmp .L_dodist + +.align 16,0x90 +.L_clip_window: + /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist + * %ecx = nbytes + * + * else { + * if (dist > wsize) { + * invalid distance + * } + * from = window; + * nbytes = dist - nbytes; + * if (write == 0) { + * from += wsize - nbytes; + */ +#define nbytes_r %ecx + movl %eax, nbytes_r + movl wsize(%esp), %eax /* prepare for dist compare */ + negl nbytes_r /* nbytes = -nbytes */ + movl window(%esp), from_r /* from = window */ + + cmpl dist_r, %eax + jb .L_invalid_distance_too_far /* if (dist > wsize) */ + + addl dist_r, nbytes_r /* nbytes = dist - nbytes */ + cmpl $0, write(%esp) + jne .L_wrap_around_window /* if (write != 0) */ + + subl nbytes_r, %eax + addl %eax, from_r /* from += wsize - nbytes */ + + /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist + * %ecx = nbytes, %eax = len + * + * if (nbytes < len) { + * len -= nbytes; + * do { + * PUP(out) = PUP(from); + * } while (--nbytes); + * from = out - dist; + * } + * } + */ +#define len_r %eax + movl len(%esp), len_r + cmpl nbytes_r, len_r + jbe .L_do_copy1 /* if (nbytes >= len) */ + + subl nbytes_r, len_r /* len -= nbytes */ + rep movsb + movl out_r, from_r + subl dist_r, from_r /* from = out - dist */ + jmp .L_do_copy1 + + cmpl nbytes_r, len_r + jbe .L_do_copy1 /* if (nbytes >= len) */ + + subl nbytes_r, len_r /* len -= nbytes */ + rep movsb + movl out_r, from_r + subl dist_r, from_r /* from = out - dist */ + jmp .L_do_copy1 + +.L_wrap_around_window: + /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist + * %ecx = nbytes, %eax = write, %eax = len + * + * else if (write < nbytes) { + * from += wsize + write - nbytes; + * nbytes -= write; + * if (nbytes < len) { + * len -= nbytes; + * do { + * PUP(out) = PUP(from); + * } while (--nbytes); + * from = window; + * nbytes = write; + * if (nbytes < len) { + * len -= nbytes; + * do { + * PUP(out) = PUP(from); + * } while(--nbytes); + * from = out - dist; + * } + * } + * } + */ +#define write_r %eax + movl write(%esp), write_r + cmpl write_r, nbytes_r + jbe .L_contiguous_in_window /* if (write >= nbytes) */ + + addl wsize(%esp), from_r + addl write_r, from_r + subl nbytes_r, from_r /* from += wsize + write - nbytes */ + subl write_r, nbytes_r /* nbytes -= write */ +#undef write_r + + movl len(%esp), len_r + cmpl nbytes_r, len_r + jbe .L_do_copy1 /* if (nbytes >= len) */ + + subl nbytes_r, len_r /* len -= nbytes */ + rep movsb + movl window(%esp), from_r /* from = window */ + movl write(%esp), nbytes_r /* nbytes = write */ + cmpl nbytes_r, len_r + jbe .L_do_copy1 /* if (nbytes >= len) */ + + subl nbytes_r, len_r /* len -= nbytes */ + rep movsb + movl out_r, from_r + subl dist_r, from_r /* from = out - dist */ + jmp .L_do_copy1 + +.L_contiguous_in_window: + /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist + * %ecx = nbytes, %eax = write, %eax = len + * + * else { + * from += write - nbytes; + * if (nbytes < len) { + * len -= nbytes; + * do { + * PUP(out) = PUP(from); + * } while (--nbytes); + * from = out - dist; + * } + * } + */ +#define write_r %eax + addl write_r, from_r + subl nbytes_r, from_r /* from += write - nbytes */ +#undef write_r + + movl len(%esp), len_r + cmpl nbytes_r, len_r + jbe .L_do_copy1 /* if (nbytes >= len) */ + + subl nbytes_r, len_r /* len -= nbytes */ + rep movsb + movl out_r, from_r + subl dist_r, from_r /* from = out - dist */ + +.L_do_copy1: + /* regs: %esi = from, %esi = in, %ebp = hold, %bl = bits, %edi = out + * %eax = len + * + * while (len > 0) { + * PUP(out) = PUP(from); + * len--; + * } + * } + * } while (in < last && out < end); + */ +#undef nbytes_r +#define in_r %esi + movl len_r, %ecx + rep movsb + + movl in(%esp), in_r /* move in back to %esi, toss from */ + jmp .L_while_test + +#undef len_r +#undef dist_r + +#endif /* NO_MMX || RUN_TIME_MMX */ + + +/*** MMX code ***/ + +#if defined( USE_MMX ) || defined( RUN_TIME_MMX ) + +.align 32,0x90 +.L_init_mmx: + emms + +#undef bits_r +#undef bitslong_r +#define bitslong_r %ebp +#define hold_mm %mm0 + movd %ebp, hold_mm + movl %ebx, bitslong_r + +#define used_mm %mm1 +#define dmask2_mm %mm2 +#define lmask2_mm %mm3 +#define lmask_mm %mm4 +#define dmask_mm %mm5 +#define tmp_mm %mm6 + + movd lmask(%esp), lmask_mm + movq lmask_mm, lmask2_mm + movd dmask(%esp), dmask_mm + movq dmask_mm, dmask2_mm + pxor used_mm, used_mm + movl lcode(%esp), %ebx /* ebx = lcode */ + jmp .L_do_loop_mmx + +.align 32,0x90 +.L_while_test_mmx: + /* while (in < last && out < end) + */ + cmpl out_r, end(%esp) + jbe .L_break_loop /* if (out >= end) */ + + cmpl in_r, last(%esp) + jbe .L_break_loop + +.L_do_loop_mmx: + psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ + + cmpl $32, bitslong_r + ja .L_get_length_code_mmx /* if (32 < bits) */ + + movd bitslong_r, tmp_mm + movd (in_r), %mm7 + addl $4, in_r + psllq tmp_mm, %mm7 + addl $32, bitslong_r + por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */ + +.L_get_length_code_mmx: + pand hold_mm, lmask_mm + movd lmask_mm, %eax + movq lmask2_mm, lmask_mm + movl (%ebx,%eax,4), %eax /* eax = lcode[hold & lmask] */ + +.L_dolen_mmx: + movzbl %ah, %ecx /* ecx = this.bits */ + movd %ecx, used_mm + subl %ecx, bitslong_r /* bits -= this.bits */ + + testb %al, %al + jnz .L_test_for_length_base_mmx /* if (op != 0) 45.7% */ + + shrl $16, %eax /* output this.val char */ + stosb + jmp .L_while_test_mmx + +.L_test_for_length_base_mmx: +#define len_r %edx + movl %eax, len_r /* len = this */ + shrl $16, len_r /* len = this.val */ + + testb $16, %al + jz .L_test_for_second_level_length_mmx /* if ((op & 16) == 0) 8% */ + andl $15, %eax /* op &= 15 */ + jz .L_decode_distance_mmx /* if (!op) */ + + psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ + movd %eax, used_mm + movd hold_mm, %ecx + subl %eax, bitslong_r + andl .L_mask(,%eax,4), %ecx + addl %ecx, len_r /* len += hold & mask[op] */ + +.L_decode_distance_mmx: + psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ + + cmpl $32, bitslong_r + ja .L_get_dist_code_mmx /* if (32 < bits) */ + + movd bitslong_r, tmp_mm + movd (in_r), %mm7 + addl $4, in_r + psllq tmp_mm, %mm7 + addl $32, bitslong_r + por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */ + +.L_get_dist_code_mmx: + movl dcode(%esp), %ebx /* ebx = dcode */ + pand hold_mm, dmask_mm + movd dmask_mm, %eax + movq dmask2_mm, dmask_mm + movl (%ebx,%eax,4), %eax /* eax = dcode[hold & lmask] */ + +.L_dodist_mmx: +#define dist_r %ebx + movzbl %ah, %ecx /* ecx = this.bits */ + movl %eax, dist_r + shrl $16, dist_r /* dist = this.val */ + subl %ecx, bitslong_r /* bits -= this.bits */ + movd %ecx, used_mm + + testb $16, %al /* if ((op & 16) == 0) */ + jz .L_test_for_second_level_dist_mmx + andl $15, %eax /* op &= 15 */ + jz .L_check_dist_one_mmx + +.L_add_bits_to_dist_mmx: + psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ + movd %eax, used_mm /* save bit length of current op */ + movd hold_mm, %ecx /* get the next bits on input stream */ + subl %eax, bitslong_r /* bits -= op bits */ + andl .L_mask(,%eax,4), %ecx /* ecx = hold & mask[op] */ + addl %ecx, dist_r /* dist += hold & mask[op] */ + +.L_check_window_mmx: + movl in_r, in(%esp) /* save in so from can use it's reg */ + movl out_r, %eax + subl beg(%esp), %eax /* nbytes = out - beg */ + + cmpl dist_r, %eax + jb .L_clip_window_mmx /* if (dist > nbytes) 4.2% */ + + movl len_r, %ecx + movl out_r, from_r + subl dist_r, from_r /* from = out - dist */ + + subl $3, %ecx + movb (from_r), %al + movb %al, (out_r) + movb 1(from_r), %al + movb 2(from_r), %dl + addl $3, from_r + movb %al, 1(out_r) + movb %dl, 2(out_r) + addl $3, out_r + rep movsb + + movl in(%esp), in_r /* move in back to %esi, toss from */ + movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */ + jmp .L_while_test_mmx + +.align 16,0x90 +.L_check_dist_one_mmx: + cmpl $1, dist_r + jne .L_check_window_mmx + cmpl out_r, beg(%esp) + je .L_check_window_mmx + + decl out_r + movl len_r, %ecx + movb (out_r), %al + subl $3, %ecx + + movb %al, 1(out_r) + movb %al, 2(out_r) + movb %al, 3(out_r) + addl $4, out_r + rep stosb + + movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */ + jmp .L_while_test_mmx + +.align 16,0x90 +.L_test_for_second_level_length_mmx: + testb $64, %al + jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */ + + andl $15, %eax + psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ + movd hold_mm, %ecx + andl .L_mask(,%eax,4), %ecx + addl len_r, %ecx + movl (%ebx,%ecx,4), %eax /* eax = lcode[hold & lmask] */ + jmp .L_dolen_mmx + +.align 16,0x90 +.L_test_for_second_level_dist_mmx: + testb $64, %al + jnz .L_invalid_distance_code /* if ((op & 64) != 0) */ + + andl $15, %eax + psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ + movd hold_mm, %ecx + andl .L_mask(,%eax,4), %ecx + movl dcode(%esp), %eax /* ecx = dcode */ + addl dist_r, %ecx + movl (%eax,%ecx,4), %eax /* eax = lcode[hold & lmask] */ + jmp .L_dodist_mmx + +.align 16,0x90 +.L_clip_window_mmx: +#define nbytes_r %ecx + movl %eax, nbytes_r + movl wsize(%esp), %eax /* prepare for dist compare */ + negl nbytes_r /* nbytes = -nbytes */ + movl window(%esp), from_r /* from = window */ + + cmpl dist_r, %eax + jb .L_invalid_distance_too_far /* if (dist > wsize) */ + + addl dist_r, nbytes_r /* nbytes = dist - nbytes */ + cmpl $0, write(%esp) + jne .L_wrap_around_window_mmx /* if (write != 0) */ + + subl nbytes_r, %eax + addl %eax, from_r /* from += wsize - nbytes */ + + cmpl nbytes_r, len_r + jbe .L_do_copy1_mmx /* if (nbytes >= len) */ + + subl nbytes_r, len_r /* len -= nbytes */ + rep movsb + movl out_r, from_r + subl dist_r, from_r /* from = out - dist */ + jmp .L_do_copy1_mmx + + cmpl nbytes_r, len_r + jbe .L_do_copy1_mmx /* if (nbytes >= len) */ + + subl nbytes_r, len_r /* len -= nbytes */ + rep movsb + movl out_r, from_r + subl dist_r, from_r /* from = out - dist */ + jmp .L_do_copy1_mmx + +.L_wrap_around_window_mmx: +#define write_r %eax + movl write(%esp), write_r + cmpl write_r, nbytes_r + jbe .L_contiguous_in_window_mmx /* if (write >= nbytes) */ + + addl wsize(%esp), from_r + addl write_r, from_r + subl nbytes_r, from_r /* from += wsize + write - nbytes */ + subl write_r, nbytes_r /* nbytes -= write */ +#undef write_r + + cmpl nbytes_r, len_r + jbe .L_do_copy1_mmx /* if (nbytes >= len) */ + + subl nbytes_r, len_r /* len -= nbytes */ + rep movsb + movl window(%esp), from_r /* from = window */ + movl write(%esp), nbytes_r /* nbytes = write */ + cmpl nbytes_r, len_r + jbe .L_do_copy1_mmx /* if (nbytes >= len) */ + + subl nbytes_r, len_r /* len -= nbytes */ + rep movsb + movl out_r, from_r + subl dist_r, from_r /* from = out - dist */ + jmp .L_do_copy1_mmx + +.L_contiguous_in_window_mmx: +#define write_r %eax + addl write_r, from_r + subl nbytes_r, from_r /* from += write - nbytes */ +#undef write_r + + cmpl nbytes_r, len_r + jbe .L_do_copy1_mmx /* if (nbytes >= len) */ + + subl nbytes_r, len_r /* len -= nbytes */ + rep movsb + movl out_r, from_r + subl dist_r, from_r /* from = out - dist */ + +.L_do_copy1_mmx: +#undef nbytes_r +#define in_r %esi + movl len_r, %ecx + rep movsb + + movl in(%esp), in_r /* move in back to %esi, toss from */ + movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */ + jmp .L_while_test_mmx + +#undef hold_r +#undef bitslong_r + +#endif /* USE_MMX || RUN_TIME_MMX */ + + +/*** USE_MMX, NO_MMX, and RUNTIME_MMX from here on ***/ + +.L_invalid_distance_code: + /* else { + * strm->msg = "invalid distance code"; + * state->mode = BAD; + * } + */ + movl $.L_invalid_distance_code_msg, %ecx + movl $INFLATE_MODE_BAD, %edx + jmp .L_update_stream_state + +.L_test_for_end_of_block: + /* else if (op & 32) { + * state->mode = TYPE; + * break; + * } + */ + testb $32, %al + jz .L_invalid_literal_length_code /* if ((op & 32) == 0) */ + + movl $0, %ecx + movl $INFLATE_MODE_TYPE, %edx + jmp .L_update_stream_state + +.L_invalid_literal_length_code: + /* else { + * strm->msg = "invalid literal/length code"; + * state->mode = BAD; + * } + */ + movl $.L_invalid_literal_length_code_msg, %ecx + movl $INFLATE_MODE_BAD, %edx + jmp .L_update_stream_state + +.L_invalid_distance_too_far: + /* strm->msg = "invalid distance too far back"; + * state->mode = BAD; + */ + movl in(%esp), in_r /* from_r has in's reg, put in back */ + movl $.L_invalid_distance_too_far_msg, %ecx + movl $INFLATE_MODE_BAD, %edx + jmp .L_update_stream_state + +.L_update_stream_state: + /* set strm->msg = %ecx, strm->state->mode = %edx */ + movl strm_sp(%esp), %eax + testl %ecx, %ecx /* if (msg != NULL) */ + jz .L_skip_msg + movl %ecx, msg_strm(%eax) /* strm->msg = msg */ +.L_skip_msg: + movl state_strm(%eax), %eax /* state = strm->state */ + movl %edx, mode_state(%eax) /* state->mode = edx (BAD | TYPE) */ + jmp .L_break_loop + +.align 32,0x90 +.L_break_loop: + +/* + * Regs: + * + * bits = %ebp when mmx, and in %ebx when non-mmx + * hold = %hold_mm when mmx, and in %ebp when non-mmx + * in = %esi + * out = %edi + */ + +#if defined( USE_MMX ) || defined( RUN_TIME_MMX ) + +#if defined( RUN_TIME_MMX ) + + cmpl $DO_USE_MMX, inflate_fast_use_mmx + jne .L_update_next_in + +#endif /* RUN_TIME_MMX */ + + movl %ebp, %ebx + +.L_update_next_in: + +#endif + +#define strm_r %eax +#define state_r %edx + + /* len = bits >> 3; + * in -= len; + * bits -= len << 3; + * hold &= (1U << bits) - 1; + * state->hold = hold; + * state->bits = bits; + * strm->next_in = in; + * strm->next_out = out; + */ + movl strm_sp(%esp), strm_r + movl %ebx, %ecx + movl state_strm(strm_r), state_r + shrl $3, %ecx + subl %ecx, in_r + shll $3, %ecx + subl %ecx, %ebx + movl out_r, next_out_strm(strm_r) + movl %ebx, bits_state(state_r) + movl %ebx, %ecx + + leal buf(%esp), %ebx + cmpl %ebx, last(%esp) + jne .L_buf_not_used /* if buf != last */ + + subl %ebx, in_r /* in -= buf */ + movl next_in_strm(strm_r), %ebx + movl %ebx, last(%esp) /* last = strm->next_in */ + addl %ebx, in_r /* in += strm->next_in */ + movl avail_in_strm(strm_r), %ebx + subl $11, %ebx + addl %ebx, last(%esp) /* last = &strm->next_in[ avail_in - 11 ] */ + +.L_buf_not_used: + movl in_r, next_in_strm(strm_r) + + movl $1, %ebx + shll %cl, %ebx + decl %ebx + +#if defined( USE_MMX ) || defined( RUN_TIME_MMX ) + +#if defined( RUN_TIME_MMX ) + + cmpl $DO_USE_MMX, inflate_fast_use_mmx + jne .L_update_hold + +#endif /* RUN_TIME_MMX */ + + psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ + movd hold_mm, %ebp + + emms + +.L_update_hold: + +#endif /* USE_MMX || RUN_TIME_MMX */ + + andl %ebx, %ebp + movl %ebp, hold_state(state_r) + +#define last_r %ebx + + /* strm->avail_in = in < last ? 11 + (last - in) : 11 - (in - last) */ + movl last(%esp), last_r + cmpl in_r, last_r + jbe .L_last_is_smaller /* if (in >= last) */ + + subl in_r, last_r /* last -= in */ + addl $11, last_r /* last += 11 */ + movl last_r, avail_in_strm(strm_r) + jmp .L_fixup_out +.L_last_is_smaller: + subl last_r, in_r /* in -= last */ + negl in_r /* in = -in */ + addl $11, in_r /* in += 11 */ + movl in_r, avail_in_strm(strm_r) + +#undef last_r +#define end_r %ebx + +.L_fixup_out: + /* strm->avail_out = out < end ? 257 + (end - out) : 257 - (out - end)*/ + movl end(%esp), end_r + cmpl out_r, end_r + jbe .L_end_is_smaller /* if (out >= end) */ + + subl out_r, end_r /* end -= out */ + addl $257, end_r /* end += 257 */ + movl end_r, avail_out_strm(strm_r) + jmp .L_done +.L_end_is_smaller: + subl end_r, out_r /* out -= end */ + negl out_r /* out = -out */ + addl $257, out_r /* out += 257 */ + movl out_r, avail_out_strm(strm_r) + +#undef end_r +#undef strm_r +#undef state_r + +.L_done: + addl $local_var_size, %esp + popf + popl %ebx + popl %ebp + popl %esi + popl %edi + ret + +#if defined( GAS_ELF ) +/* elf info */ +.type inflate_fast, at function +.size inflate_fast,.-inflate_fast +#endif Added: external/zlib/contrib/iostream/test.cpp ============================================================================== --- (empty file) +++ external/zlib/contrib/iostream/test.cpp Tue Jan 3 07:42:59 2006 @@ -0,0 +1,24 @@ + +#include "zfstream.h" + +int main() { + + // Construct a stream object with this filebuffer. Anything sent + // to this stream will go to standard out. + gzofstream os( 1, ios::out ); + + // This text is getting compressed and sent to stdout. + // To prove this, run 'test | zcat'. + os << "Hello, Mommy" << endl; + + os << setcompressionlevel( Z_NO_COMPRESSION ); + os << "hello, hello, hi, ho!" << endl; + + setcompressionlevel( os, Z_DEFAULT_COMPRESSION ) + << "I'm compressing again" << endl; + + os.close(); + + return 0; + +} Added: external/zlib/contrib/iostream/zfstream.cpp ============================================================================== --- (empty file) +++ external/zlib/contrib/iostream/zfstream.cpp Tue Jan 3 07:42:59 2006 @@ -0,0 +1,329 @@ + +#include "zfstream.h" + +gzfilebuf::gzfilebuf() : + file(NULL), + mode(0), + own_file_descriptor(0) +{ } + +gzfilebuf::~gzfilebuf() { + + sync(); + if ( own_file_descriptor ) + close(); + +} + +gzfilebuf *gzfilebuf::open( const char *name, + int io_mode ) { + + if ( is_open() ) + return NULL; + + char char_mode[10]; + char *p = char_mode; + + if ( io_mode & ios::in ) { + mode = ios::in; + *p++ = 'r'; + } else if ( io_mode & ios::app ) { + mode = ios::app; + *p++ = 'a'; + } else { + mode = ios::out; + *p++ = 'w'; + } + + if ( io_mode & ios::binary ) { + mode |= ios::binary; + *p++ = 'b'; + } + + // Hard code the compression level + if ( io_mode & (ios::out|ios::app )) { + *p++ = '9'; + } + + // Put the end-of-string indicator + *p = '\0'; + + if ( (file = gzopen(name, char_mode)) == NULL ) + return NULL; + + own_file_descriptor = 1; + + return this; + +} + +gzfilebuf *gzfilebuf::attach( int file_descriptor, + int io_mode ) { + + if ( is_open() ) + return NULL; + + char char_mode[10]; + char *p = char_mode; + + if ( io_mode & ios::in ) { + mode = ios::in; + *p++ = 'r'; + } else if ( io_mode & ios::app ) { + mode = ios::app; + *p++ = 'a'; + } else { + mode = ios::out; + *p++ = 'w'; + } + + if ( io_mode & ios::binary ) { + mode |= ios::binary; + *p++ = 'b'; + } + + // Hard code the compression level + if ( io_mode & (ios::out|ios::app )) { + *p++ = '9'; + } + + // Put the end-of-string indicator + *p = '\0'; + + if ( (file = gzdopen(file_descriptor, char_mode)) == NULL ) + return NULL; + + own_file_descriptor = 0; + + return this; + +} + +gzfilebuf *gzfilebuf::close() { + + if ( is_open() ) { + + sync(); + gzclose( file ); + file = NULL; + + } + + return this; + +} + +int gzfilebuf::setcompressionlevel( int comp_level ) { + + return gzsetparams(file, comp_level, -2); + +} + +int gzfilebuf::setcompressionstrategy( int comp_strategy ) { + + return gzsetparams(file, -2, comp_strategy); + +} + + +streampos gzfilebuf::seekoff( streamoff off, ios::seek_dir dir, int which ) { + + return streampos(EOF); + +} + +int gzfilebuf::underflow() { + + // If the file hasn't been opened for reading, error. + if ( !is_open() || !(mode & ios::in) ) + return EOF; + + // if a buffer doesn't exists, allocate one. + if ( !base() ) { + + if ( (allocate()) == EOF ) + return EOF; + setp(0,0); + + } else { + + if ( in_avail() ) + return (unsigned char) *gptr(); + + if ( out_waiting() ) { + if ( flushbuf() == EOF ) + return EOF; + } + + } + + // Attempt to fill the buffer. + + int result = fillbuf(); + if ( result == EOF ) { + // disable get area + setg(0,0,0); + return EOF; + } + + return (unsigned char) *gptr(); + +} + +int gzfilebuf::overflow( int c ) { + + if ( !is_open() || !(mode & ios::out) ) + return EOF; + + if ( !base() ) { + if ( allocate() == EOF ) + return EOF; + setg(0,0,0); + } else { + if (in_avail()) { + return EOF; + } + if (out_waiting()) { + if (flushbuf() == EOF) + return EOF; + } + } + + int bl = blen(); + setp( base(), base() + bl); + + if ( c != EOF ) { + + *pptr() = c; + pbump(1); + + } + + return 0; + +} + +int gzfilebuf::sync() { + + if ( !is_open() ) + return EOF; + + if ( out_waiting() ) + return flushbuf(); + + return 0; + +} + +int gzfilebuf::flushbuf() { + + int n; + char *q; + + q = pbase(); + n = pptr() - q; + + if ( gzwrite( file, q, n) < n ) + return EOF; + + setp(0,0); + + return 0; + +} + +int gzfilebuf::fillbuf() { + + int required; + char *p; + + p = base(); + + required = blen(); + + int t = gzread( file, p, required ); + + if ( t <= 0) return EOF; + + setg( base(), base(), base()+t); + + return t; + +} + +gzfilestream_common::gzfilestream_common() : + ios( gzfilestream_common::rdbuf() ) +{ } + +gzfilestream_common::~gzfilestream_common() +{ } + +void gzfilestream_common::attach( int fd, int io_mode ) { + + if ( !buffer.attach( fd, io_mode) ) + clear( ios::failbit | ios::badbit ); + else + clear(); + +} + +void gzfilestream_common::open( const char *name, int io_mode ) { + + if ( !buffer.open( name, io_mode ) ) + clear( ios::failbit | ios::badbit ); + else + clear(); + +} + +void gzfilestream_common::close() { + + if ( !buffer.close() ) + clear( ios::failbit | ios::badbit ); + +} + +gzfilebuf *gzfilestream_common::rdbuf() +{ + return &buffer; +} + +gzifstream::gzifstream() : + ios( gzfilestream_common::rdbuf() ) +{ + clear( ios::badbit ); +} + +gzifstream::gzifstream( const char *name, int io_mode ) : + ios( gzfilestream_common::rdbuf() ) +{ + gzfilestream_common::open( name, io_mode ); +} + +gzifstream::gzifstream( int fd, int io_mode ) : + ios( gzfilestream_common::rdbuf() ) +{ + gzfilestream_common::attach( fd, io_mode ); +} + +gzifstream::~gzifstream() { } + +gzofstream::gzofstream() : + ios( gzfilestream_common::rdbuf() ) +{ + clear( ios::badbit ); +} + +gzofstream::gzofstream( const char *name, int io_mode ) : + ios( gzfilestream_common::rdbuf() ) +{ + gzfilestream_common::open( name, io_mode ); +} + +gzofstream::gzofstream( int fd, int io_mode ) : + ios( gzfilestream_common::rdbuf() ) +{ + gzfilestream_common::attach( fd, io_mode ); +} + +gzofstream::~gzofstream() { } Added: external/zlib/contrib/iostream/zfstream.h ============================================================================== --- (empty file) +++ external/zlib/contrib/iostream/zfstream.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,128 @@ + +#ifndef zfstream_h +#define zfstream_h + +#include +#include "zlib.h" + +class gzfilebuf : public streambuf { + +public: + + gzfilebuf( ); + virtual ~gzfilebuf(); + + gzfilebuf *open( const char *name, int io_mode ); + gzfilebuf *attach( int file_descriptor, int io_mode ); + gzfilebuf *close(); + + int setcompressionlevel( int comp_level ); + int setcompressionstrategy( int comp_strategy ); + + inline int is_open() const { return (file !=NULL); } + + virtual streampos seekoff( streamoff, ios::seek_dir, int ); + + virtual int sync(); + +protected: + + virtual int underflow(); + virtual int overflow( int = EOF ); + +private: + + gzFile file; + short mode; + short own_file_descriptor; + + int flushbuf(); + int fillbuf(); + +}; + +class gzfilestream_common : virtual public ios { + + friend class gzifstream; + friend class gzofstream; + friend gzofstream &setcompressionlevel( gzofstream &, int ); + friend gzofstream &setcompressionstrategy( gzofstream &, int ); + +public: + virtual ~gzfilestream_common(); + + void attach( int fd, int io_mode ); + void open( const char *name, int io_mode ); + void close(); + +protected: + gzfilestream_common(); + +private: + gzfilebuf *rdbuf(); + + gzfilebuf buffer; + +}; + +class gzifstream : public gzfilestream_common, public istream { + +public: + + gzifstream(); + gzifstream( const char *name, int io_mode = ios::in ); + gzifstream( int fd, int io_mode = ios::in ); + + virtual ~gzifstream(); + +}; + +class gzofstream : public gzfilestream_common, public ostream { + +public: + + gzofstream(); + gzofstream( const char *name, int io_mode = ios::out ); + gzofstream( int fd, int io_mode = ios::out ); + + virtual ~gzofstream(); + +}; + +template class gzomanip { + friend gzofstream &operator<<(gzofstream &, const gzomanip &); +public: + gzomanip(gzofstream &(*f)(gzofstream &, T), T v) : func(f), val(v) { } +private: + gzofstream &(*func)(gzofstream &, T); + T val; +}; + +template gzofstream &operator<<(gzofstream &s, const gzomanip &m) +{ + return (*m.func)(s, m.val); +} + +inline gzofstream &setcompressionlevel( gzofstream &s, int l ) +{ + (s.rdbuf())->setcompressionlevel(l); + return s; +} + +inline gzofstream &setcompressionstrategy( gzofstream &s, int l ) +{ + (s.rdbuf())->setcompressionstrategy(l); + return s; +} + +inline gzomanip setcompressionlevel(int l) +{ + return gzomanip(&setcompressionlevel,l); +} + +inline gzomanip setcompressionstrategy(int l) +{ + return gzomanip(&setcompressionstrategy,l); +} + +#endif Added: external/zlib/contrib/iostream2/zstream.h ============================================================================== --- (empty file) +++ external/zlib/contrib/iostream2/zstream.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,307 @@ +/* + * + * Copyright (c) 1997 + * Christian Michelsen Research AS + * Advanced Computing + * Fantoftvegen 38, 5036 BERGEN, Norway + * http://www.cmr.no + * + * Permission to use, copy, modify, distribute and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appear in all copies and + * that both that copyright notice and this permission notice appear + * in supporting documentation. Christian Michelsen Research AS makes no + * representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied warranty. + * + */ + +#ifndef ZSTREAM__H +#define ZSTREAM__H + +/* + * zstream.h - C++ interface to the 'zlib' general purpose compression library + * $Id: zstream.h 1.1 1997-06-25 12:00:56+02 tyge Exp tyge $ + */ + +#include +#include +#include +#include "zlib.h" + +#if defined(_WIN32) +# include +# include +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif + +class zstringlen { +public: + zstringlen(class izstream&); + zstringlen(class ozstream&, const char*); + size_t value() const { return val.word; } +private: + struct Val { unsigned char byte; size_t word; } val; +}; + +// ----------------------------- izstream ----------------------------- + +class izstream +{ + public: + izstream() : m_fp(0) {} + izstream(FILE* fp) : m_fp(0) { open(fp); } + izstream(const char* name) : m_fp(0) { open(name); } + ~izstream() { close(); } + + /* Opens a gzip (.gz) file for reading. + * open() can be used to read a file which is not in gzip format; + * in this case read() will directly read from the file without + * decompression. errno can be checked to distinguish two error + * cases (if errno is zero, the zlib error is Z_MEM_ERROR). + */ + void open(const char* name) { + if (m_fp) close(); + m_fp = ::gzopen(name, "rb"); + } + + void open(FILE* fp) { + SET_BINARY_MODE(fp); + if (m_fp) close(); + m_fp = ::gzdopen(fileno(fp), "rb"); + } + + /* Flushes all pending input if necessary, closes the compressed file + * and deallocates all the (de)compression state. The return value is + * the zlib error number (see function error() below). + */ + int close() { + int r = ::gzclose(m_fp); + m_fp = 0; return r; + } + + /* Binary read the given number of bytes from the compressed file. + */ + int read(void* buf, size_t len) { + return ::gzread(m_fp, buf, len); + } + + /* Returns the error message for the last error which occurred on the + * given compressed file. errnum is set to zlib error number. If an + * error occurred in the file system and not in the compression library, + * errnum is set to Z_ERRNO and the application may consult errno + * to get the exact error code. + */ + const char* error(int* errnum) { + return ::gzerror(m_fp, errnum); + } + + gzFile fp() { return m_fp; } + + private: + gzFile m_fp; +}; + +/* + * Binary read the given (array of) object(s) from the compressed file. + * If the input file was not in gzip format, read() copies the objects number + * of bytes into the buffer. + * returns the number of uncompressed bytes actually read + * (0 for end of file, -1 for error). + */ +template +inline int read(izstream& zs, T* x, Items items) { + return ::gzread(zs.fp(), x, items*sizeof(T)); +} + +/* + * Binary input with the '>' operator. + */ +template +inline izstream& operator>(izstream& zs, T& x) { + ::gzread(zs.fp(), &x, sizeof(T)); + return zs; +} + + +inline zstringlen::zstringlen(izstream& zs) { + zs > val.byte; + if (val.byte == 255) zs > val.word; + else val.word = val.byte; +} + +/* + * Read length of string + the string with the '>' operator. + */ +inline izstream& operator>(izstream& zs, char* x) { + zstringlen len(zs); + ::gzread(zs.fp(), x, len.value()); + x[len.value()] = '\0'; + return zs; +} + +inline char* read_string(izstream& zs) { + zstringlen len(zs); + char* x = new char[len.value()+1]; + ::gzread(zs.fp(), x, len.value()); + x[len.value()] = '\0'; + return x; +} + +// ----------------------------- ozstream ----------------------------- + +class ozstream +{ + public: + ozstream() : m_fp(0), m_os(0) { + } + ozstream(FILE* fp, int level = Z_DEFAULT_COMPRESSION) + : m_fp(0), m_os(0) { + open(fp, level); + } + ozstream(const char* name, int level = Z_DEFAULT_COMPRESSION) + : m_fp(0), m_os(0) { + open(name, level); + } + ~ozstream() { + close(); + } + + /* Opens a gzip (.gz) file for writing. + * The compression level parameter should be in 0..9 + * errno can be checked to distinguish two error cases + * (if errno is zero, the zlib error is Z_MEM_ERROR). + */ + void open(const char* name, int level = Z_DEFAULT_COMPRESSION) { + char mode[4] = "wb\0"; + if (level != Z_DEFAULT_COMPRESSION) mode[2] = '0'+level; + if (m_fp) close(); + m_fp = ::gzopen(name, mode); + } + + /* open from a FILE pointer. + */ + void open(FILE* fp, int level = Z_DEFAULT_COMPRESSION) { + SET_BINARY_MODE(fp); + char mode[4] = "wb\0"; + if (level != Z_DEFAULT_COMPRESSION) mode[2] = '0'+level; + if (m_fp) close(); + m_fp = ::gzdopen(fileno(fp), mode); + } + + /* Flushes all pending output if necessary, closes the compressed file + * and deallocates all the (de)compression state. The return value is + * the zlib error number (see function error() below). + */ + int close() { + if (m_os) { + ::gzwrite(m_fp, m_os->str(), m_os->pcount()); + delete[] m_os->str(); delete m_os; m_os = 0; + } + int r = ::gzclose(m_fp); m_fp = 0; return r; + } + + /* Binary write the given number of bytes into the compressed file. + */ + int write(const void* buf, size_t len) { + return ::gzwrite(m_fp, (voidp) buf, len); + } + + /* Flushes all pending output into the compressed file. The parameter + * _flush is as in the deflate() function. The return value is the zlib + * error number (see function gzerror below). flush() returns Z_OK if + * the flush_ parameter is Z_FINISH and all output could be flushed. + * flush() should be called only when strictly necessary because it can + * degrade compression. + */ + int flush(int _flush) { + os_flush(); + return ::gzflush(m_fp, _flush); + } + + /* Returns the error message for the last error which occurred on the + * given compressed file. errnum is set to zlib error number. If an + * error occurred in the file system and not in the compression library, + * errnum is set to Z_ERRNO and the application may consult errno + * to get the exact error code. + */ + const char* error(int* errnum) { + return ::gzerror(m_fp, errnum); + } + + gzFile fp() { return m_fp; } + + ostream& os() { + if (m_os == 0) m_os = new ostrstream; + return *m_os; + } + + void os_flush() { + if (m_os && m_os->pcount()>0) { + ostrstream* oss = new ostrstream; + oss->fill(m_os->fill()); + oss->flags(m_os->flags()); + oss->precision(m_os->precision()); + oss->width(m_os->width()); + ::gzwrite(m_fp, m_os->str(), m_os->pcount()); + delete[] m_os->str(); delete m_os; m_os = oss; + } + } + + private: + gzFile m_fp; + ostrstream* m_os; +}; + +/* + * Binary write the given (array of) object(s) into the compressed file. + * returns the number of uncompressed bytes actually written + * (0 in case of error). + */ +template +inline int write(ozstream& zs, const T* x, Items items) { + return ::gzwrite(zs.fp(), (voidp) x, items*sizeof(T)); +} + +/* + * Binary output with the '<' operator. + */ +template +inline ozstream& operator<(ozstream& zs, const T& x) { + ::gzwrite(zs.fp(), (voidp) &x, sizeof(T)); + return zs; +} + +inline zstringlen::zstringlen(ozstream& zs, const char* x) { + val.byte = 255; val.word = ::strlen(x); + if (val.word < 255) zs < (val.byte = val.word); + else zs < val; +} + +/* + * Write length of string + the string with the '<' operator. + */ +inline ozstream& operator<(ozstream& zs, const char* x) { + zstringlen len(zs, x); + ::gzwrite(zs.fp(), (voidp) x, len.value()); + return zs; +} + +#ifdef _MSC_VER +inline ozstream& operator<(ozstream& zs, char* const& x) { + return zs < (const char*) x; +} +#endif + +/* + * Ascii write with the << operator; + */ +template +inline ostream& operator<<(ozstream& zs, const T& x) { + zs.os_flush(); + return zs.os() << x; +} + +#endif Added: external/zlib/contrib/iostream2/zstream_test.cpp ============================================================================== --- (empty file) +++ external/zlib/contrib/iostream2/zstream_test.cpp Tue Jan 3 07:42:59 2006 @@ -0,0 +1,25 @@ +#include "zstream.h" +#include +#include +#include + +void main() { + char h[256] = "Hello"; + char* g = "Goodbye"; + ozstream out("temp.gz"); + out < "This works well" < h < g; + out.close(); + + izstream in("temp.gz"); // read it back + char *x = read_string(in), *y = new char[256], z[256]; + in > y > z; + in.close(); + cout << x << endl << y << endl << z << endl; + + out.open("temp.gz"); // try ascii output; zcat temp.gz to see the results + out << setw(50) << setfill('#') << setprecision(20) << x << endl << y << endl << z << endl; + out << z << endl << y << endl << x << endl; + out << 1.1234567890123456789 << endl; + + delete[] x; delete[] y; +} Added: external/zlib/contrib/iostream3/README ============================================================================== --- (empty file) +++ external/zlib/contrib/iostream3/README Tue Jan 3 07:42:59 2006 @@ -0,0 +1,35 @@ +These classes provide a C++ stream interface to the zlib library. It allows you +to do things like: + + gzofstream outf("blah.gz"); + outf << "These go into the gzip file " << 123 << endl; + +It does this by deriving a specialized stream buffer for gzipped files, which is +the way Stroustrup would have done it. :-> + +The gzifstream and gzofstream classes were originally written by Kevin Ruland +and made available in the zlib contrib/iostream directory. The older version still +compiles under gcc 2.xx, but not under gcc 3.xx, which sparked the development of +this version. + +The new classes are as standard-compliant as possible, closely following the +approach of the standard library's fstream classes. It compiles under gcc versions +3.2 and 3.3, but not under gcc 2.xx. This is mainly due to changes in the standard +library naming scheme. The new version of gzifstream/gzofstream/gzfilebuf differs +from the previous one in the following respects: +- added showmanyc +- added setbuf, with support for unbuffered output via setbuf(0,0) +- a few bug fixes of stream behavior +- gzipped output file opened with default compression level instead of maximum level +- setcompressionlevel()/strategy() members replaced by single setcompression() + +The code is provided "as is", with the permission to use, copy, modify, distribute +and sell it for any purpose without fee. + +Ludwig Schwardt + + +DSP Lab +Electrical & Electronic Engineering Department +University of Stellenbosch +South Africa Added: external/zlib/contrib/iostream3/TODO ============================================================================== --- (empty file) +++ external/zlib/contrib/iostream3/TODO Tue Jan 3 07:42:59 2006 @@ -0,0 +1,17 @@ +Possible upgrades to gzfilebuf: + +- The ability to do putback (e.g. putbackfail) + +- The ability to seek (zlib supports this, but could be slow/tricky) + +- Simultaneous read/write access (does it make sense?) + +- Support for ios_base::ate open mode + +- Locale support? + +- Check public interface to see which calls give problems + (due to dependence on library internals) + +- Override operator<<(ostream&, gzfilebuf*) to allow direct copying + of stream buffer to stream ( i.e. os << is.rdbuf(); ) Added: external/zlib/contrib/iostream3/test.cc ============================================================================== --- (empty file) +++ external/zlib/contrib/iostream3/test.cc Tue Jan 3 07:42:59 2006 @@ -0,0 +1,50 @@ +/* + * Test program for gzifstream and gzofstream + * + * by Ludwig Schwardt + * original version by Kevin Ruland + */ + +#include "zfstream.h" +#include // for cout + +int main() { + + gzofstream outf; + gzifstream inf; + char buf[80]; + + outf.open("test1.txt.gz"); + outf << "The quick brown fox sidestepped the lazy canine\n" + << 1.3 << "\nPlan " << 9 << std::endl; + outf.close(); + std::cout << "Wrote the following message to 'test1.txt.gz' (check with zcat or zless):\n" + << "The quick brown fox sidestepped the lazy canine\n" + << 1.3 << "\nPlan " << 9 << std::endl; + + std::cout << "\nReading 'test1.txt.gz' (buffered) produces:\n"; + inf.open("test1.txt.gz"); + while (inf.getline(buf,80,'\n')) { + std::cout << buf << "\t(" << inf.rdbuf()->in_avail() << " chars left in buffer)\n"; + } + inf.close(); + + outf.rdbuf()->pubsetbuf(0,0); + outf.open("test2.txt.gz"); + outf << setcompression(Z_NO_COMPRESSION) + << "The quick brown fox sidestepped the lazy canine\n" + << 1.3 << "\nPlan " << 9 << std::endl; + outf.close(); + std::cout << "\nWrote the same message to 'test2.txt.gz' in uncompressed form"; + + std::cout << "\nReading 'test2.txt.gz' (unbuffered) produces:\n"; + inf.rdbuf()->pubsetbuf(0,0); + inf.open("test2.txt.gz"); + while (inf.getline(buf,80,'\n')) { + std::cout << buf << "\t(" << inf.rdbuf()->in_avail() << " chars left in buffer)\n"; + } + inf.close(); + + return 0; + +} Added: external/zlib/contrib/iostream3/zfstream.cc ============================================================================== --- (empty file) +++ external/zlib/contrib/iostream3/zfstream.cc Tue Jan 3 07:42:59 2006 @@ -0,0 +1,479 @@ +/* + * A C++ I/O streams interface to the zlib gz* functions + * + * by Ludwig Schwardt + * original version by Kevin Ruland + * + * This version is standard-compliant and compatible with gcc 3.x. + */ + +#include "zfstream.h" +#include // for strcpy, strcat, strlen (mode strings) +#include // for BUFSIZ + +// Internal buffer sizes (default and "unbuffered" versions) +#define BIGBUFSIZE BUFSIZ +#define SMALLBUFSIZE 1 + +/*****************************************************************************/ + +// Default constructor +gzfilebuf::gzfilebuf() +: file(NULL), io_mode(std::ios_base::openmode(0)), own_fd(false), + buffer(NULL), buffer_size(BIGBUFSIZE), own_buffer(true) +{ + // No buffers to start with + this->disable_buffer(); +} + +// Destructor +gzfilebuf::~gzfilebuf() +{ + // Sync output buffer and close only if responsible for file + // (i.e. attached streams should be left open at this stage) + this->sync(); + if (own_fd) + this->close(); + // Make sure internal buffer is deallocated + this->disable_buffer(); +} + +// Set compression level and strategy +int +gzfilebuf::setcompression(int comp_level, + int comp_strategy) +{ + return gzsetparams(file, comp_level, comp_strategy); +} + +// Open gzipped file +gzfilebuf* +gzfilebuf::open(const char *name, + std::ios_base::openmode mode) +{ + // Fail if file already open + if (this->is_open()) + return NULL; + // Don't support simultaneous read/write access (yet) + if ((mode & std::ios_base::in) && (mode & std::ios_base::out)) + return NULL; + + // Build mode string for gzopen and check it [27.8.1.3.2] + char char_mode[6] = "\0\0\0\0\0"; + if (!this->open_mode(mode, char_mode)) + return NULL; + + // Attempt to open file + if ((file = gzopen(name, char_mode)) == NULL) + return NULL; + + // On success, allocate internal buffer and set flags + this->enable_buffer(); + io_mode = mode; + own_fd = true; + return this; +} + +// Attach to gzipped file +gzfilebuf* +gzfilebuf::attach(int fd, + std::ios_base::openmode mode) +{ + // Fail if file already open + if (this->is_open()) + return NULL; + // Don't support simultaneous read/write access (yet) + if ((mode & std::ios_base::in) && (mode & std::ios_base::out)) + return NULL; + + // Build mode string for gzdopen and check it [27.8.1.3.2] + char char_mode[6] = "\0\0\0\0\0"; + if (!this->open_mode(mode, char_mode)) + return NULL; + + // Attempt to attach to file + if ((file = gzdopen(fd, char_mode)) == NULL) + return NULL; + + // On success, allocate internal buffer and set flags + this->enable_buffer(); + io_mode = mode; + own_fd = false; + return this; +} + +// Close gzipped file +gzfilebuf* +gzfilebuf::close() +{ + // Fail immediately if no file is open + if (!this->is_open()) + return NULL; + // Assume success + gzfilebuf* retval = this; + // Attempt to sync and close gzipped file + if (this->sync() == -1) + retval = NULL; + if (gzclose(file) < 0) + retval = NULL; + // File is now gone anyway (postcondition [27.8.1.3.8]) + file = NULL; + own_fd = false; + // Destroy internal buffer if it exists + this->disable_buffer(); + return retval; +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +// Convert int open mode to mode string +bool +gzfilebuf::open_mode(std::ios_base::openmode mode, + char* c_mode) const +{ + bool testb = mode & std::ios_base::binary; + bool testi = mode & std::ios_base::in; + bool testo = mode & std::ios_base::out; + bool testt = mode & std::ios_base::trunc; + bool testa = mode & std::ios_base::app; + + // Check for valid flag combinations - see [27.8.1.3.2] (Table 92) + // Original zfstream hardcoded the compression level to maximum here... + // Double the time for less than 1% size improvement seems + // excessive though - keeping it at the default level + // To change back, just append "9" to the next three mode strings + if (!testi && testo && !testt && !testa) + strcpy(c_mode, "w"); + if (!testi && testo && !testt && testa) + strcpy(c_mode, "a"); + if (!testi && testo && testt && !testa) + strcpy(c_mode, "w"); + if (testi && !testo && !testt && !testa) + strcpy(c_mode, "r"); + // No read/write mode yet +// if (testi && testo && !testt && !testa) +// strcpy(c_mode, "r+"); +// if (testi && testo && testt && !testa) +// strcpy(c_mode, "w+"); + + // Mode string should be empty for invalid combination of flags + if (strlen(c_mode) == 0) + return false; + if (testb) + strcat(c_mode, "b"); + return true; +} + +// Determine number of characters in internal get buffer +std::streamsize +gzfilebuf::showmanyc() +{ + // Calls to underflow will fail if file not opened for reading + if (!this->is_open() || !(io_mode & std::ios_base::in)) + return -1; + // Make sure get area is in use + if (this->gptr() && (this->gptr() < this->egptr())) + return std::streamsize(this->egptr() - this->gptr()); + else + return 0; +} + +// Fill get area from gzipped file +gzfilebuf::int_type +gzfilebuf::underflow() +{ + // If something is left in the get area by chance, return it + // (this shouldn't normally happen, as underflow is only supposed + // to be called when gptr >= egptr, but it serves as error check) + if (this->gptr() && (this->gptr() < this->egptr())) + return traits_type::to_int_type(*(this->gptr())); + + // If the file hasn't been opened for reading, produce error + if (!this->is_open() || !(io_mode & std::ios_base::in)) + return traits_type::eof(); + + // Attempt to fill internal buffer from gzipped file + // (buffer must be guaranteed to exist...) + int bytes_read = gzread(file, buffer, buffer_size); + // Indicates error or EOF + if (bytes_read <= 0) + { + // Reset get area + this->setg(buffer, buffer, buffer); + return traits_type::eof(); + } + // Make all bytes read from file available as get area + this->setg(buffer, buffer, buffer + bytes_read); + + // Return next character in get area + return traits_type::to_int_type(*(this->gptr())); +} + +// Write put area to gzipped file +gzfilebuf::int_type +gzfilebuf::overflow(int_type c) +{ + // Determine whether put area is in use + if (this->pbase()) + { + // Double-check pointer range + if (this->pptr() > this->epptr() || this->pptr() < this->pbase()) + return traits_type::eof(); + // Add extra character to buffer if not EOF + if (!traits_type::eq_int_type(c, traits_type::eof())) + { + *(this->pptr()) = traits_type::to_char_type(c); + this->pbump(1); + } + // Number of characters to write to file + int bytes_to_write = this->pptr() - this->pbase(); + // Overflow doesn't fail if nothing is to be written + if (bytes_to_write > 0) + { + // If the file hasn't been opened for writing, produce error + if (!this->is_open() || !(io_mode & std::ios_base::out)) + return traits_type::eof(); + // If gzipped file won't accept all bytes written to it, fail + if (gzwrite(file, this->pbase(), bytes_to_write) != bytes_to_write) + return traits_type::eof(); + // Reset next pointer to point to pbase on success + this->pbump(-bytes_to_write); + } + } + // Write extra character to file if not EOF + else if (!traits_type::eq_int_type(c, traits_type::eof())) + { + // If the file hasn't been opened for writing, produce error + if (!this->is_open() || !(io_mode & std::ios_base::out)) + return traits_type::eof(); + // Impromptu char buffer (allows "unbuffered" output) + char_type last_char = traits_type::to_char_type(c); + // If gzipped file won't accept this character, fail + if (gzwrite(file, &last_char, 1) != 1) + return traits_type::eof(); + } + + // If you got here, you have succeeded (even if c was EOF) + // The return value should therefore be non-EOF + if (traits_type::eq_int_type(c, traits_type::eof())) + return traits_type::not_eof(c); + else + return c; +} + +// Assign new buffer +std::streambuf* +gzfilebuf::setbuf(char_type* p, + std::streamsize n) +{ + // First make sure stuff is sync'ed, for safety + if (this->sync() == -1) + return NULL; + // If buffering is turned off on purpose via setbuf(0,0), still allocate one... + // "Unbuffered" only really refers to put [27.8.1.4.10], while get needs at + // least a buffer of size 1 (very inefficient though, therefore make it bigger?) + // This follows from [27.5.2.4.3]/12 (gptr needs to point at something, it seems) + if (!p || !n) + { + // Replace existing buffer (if any) with small internal buffer + this->disable_buffer(); + buffer = NULL; + buffer_size = 0; + own_buffer = true; + this->enable_buffer(); + } + else + { + // Replace existing buffer (if any) with external buffer + this->disable_buffer(); + buffer = p; + buffer_size = n; + own_buffer = false; + this->enable_buffer(); + } + return this; +} + +// Write put area to gzipped file (i.e. ensures that put area is empty) +int +gzfilebuf::sync() +{ + return traits_type::eq_int_type(this->overflow(), traits_type::eof()) ? -1 : 0; +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +// Allocate internal buffer +void +gzfilebuf::enable_buffer() +{ + // If internal buffer required, allocate one + if (own_buffer && !buffer) + { + // Check for buffered vs. "unbuffered" + if (buffer_size > 0) + { + // Allocate internal buffer + buffer = new char_type[buffer_size]; + // Get area starts empty and will be expanded by underflow as need arises + this->setg(buffer, buffer, buffer); + // Setup entire internal buffer as put area. + // The one-past-end pointer actually points to the last element of the buffer, + // so that overflow(c) can safely add the extra character c to the sequence. + // These pointers remain in place for the duration of the buffer + this->setp(buffer, buffer + buffer_size - 1); + } + else + { + // Even in "unbuffered" case, (small?) get buffer is still required + buffer_size = SMALLBUFSIZE; + buffer = new char_type[buffer_size]; + this->setg(buffer, buffer, buffer); + // "Unbuffered" means no put buffer + this->setp(0, 0); + } + } + else + { + // If buffer already allocated, reset buffer pointers just to make sure no + // stale chars are lying around + this->setg(buffer, buffer, buffer); + this->setp(buffer, buffer + buffer_size - 1); + } +} + +// Destroy internal buffer +void +gzfilebuf::disable_buffer() +{ + // If internal buffer exists, deallocate it + if (own_buffer && buffer) + { + // Preserve unbuffered status by zeroing size + if (!this->pbase()) + buffer_size = 0; + delete[] buffer; + buffer = NULL; + this->setg(0, 0, 0); + this->setp(0, 0); + } + else + { + // Reset buffer pointers to initial state if external buffer exists + this->setg(buffer, buffer, buffer); + if (buffer) + this->setp(buffer, buffer + buffer_size - 1); + else + this->setp(0, 0); + } +} + +/*****************************************************************************/ + +// Default constructor initializes stream buffer +gzifstream::gzifstream() +: std::istream(NULL), sb() +{ this->init(&sb); } + +// Initialize stream buffer and open file +gzifstream::gzifstream(const char* name, + std::ios_base::openmode mode) +: std::istream(NULL), sb() +{ + this->init(&sb); + this->open(name, mode); +} + +// Initialize stream buffer and attach to file +gzifstream::gzifstream(int fd, + std::ios_base::openmode mode) +: std::istream(NULL), sb() +{ + this->init(&sb); + this->attach(fd, mode); +} + +// Open file and go into fail() state if unsuccessful +void +gzifstream::open(const char* name, + std::ios_base::openmode mode) +{ + if (!sb.open(name, mode | std::ios_base::in)) + this->setstate(std::ios_base::failbit); + else + this->clear(); +} + +// Attach to file and go into fail() state if unsuccessful +void +gzifstream::attach(int fd, + std::ios_base::openmode mode) +{ + if (!sb.attach(fd, mode | std::ios_base::in)) + this->setstate(std::ios_base::failbit); + else + this->clear(); +} + +// Close file +void +gzifstream::close() +{ + if (!sb.close()) + this->setstate(std::ios_base::failbit); +} + +/*****************************************************************************/ + +// Default constructor initializes stream buffer +gzofstream::gzofstream() +: std::ostream(NULL), sb() +{ this->init(&sb); } + +// Initialize stream buffer and open file +gzofstream::gzofstream(const char* name, + std::ios_base::openmode mode) +: std::ostream(NULL), sb() +{ + this->init(&sb); + this->open(name, mode); +} + +// Initialize stream buffer and attach to file +gzofstream::gzofstream(int fd, + std::ios_base::openmode mode) +: std::ostream(NULL), sb() +{ + this->init(&sb); + this->attach(fd, mode); +} + +// Open file and go into fail() state if unsuccessful +void +gzofstream::open(const char* name, + std::ios_base::openmode mode) +{ + if (!sb.open(name, mode | std::ios_base::out)) + this->setstate(std::ios_base::failbit); + else + this->clear(); +} + +// Attach to file and go into fail() state if unsuccessful +void +gzofstream::attach(int fd, + std::ios_base::openmode mode) +{ + if (!sb.attach(fd, mode | std::ios_base::out)) + this->setstate(std::ios_base::failbit); + else + this->clear(); +} + +// Close file +void +gzofstream::close() +{ + if (!sb.close()) + this->setstate(std::ios_base::failbit); +} Added: external/zlib/contrib/iostream3/zfstream.h ============================================================================== --- (empty file) +++ external/zlib/contrib/iostream3/zfstream.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,466 @@ +/* + * A C++ I/O streams interface to the zlib gz* functions + * + * by Ludwig Schwardt + * original version by Kevin Ruland + * + * This version is standard-compliant and compatible with gcc 3.x. + */ + +#ifndef ZFSTREAM_H +#define ZFSTREAM_H + +#include // not iostream, since we don't need cin/cout +#include +#include "zlib.h" + +/*****************************************************************************/ + +/** + * @brief Gzipped file stream buffer class. + * + * This class implements basic_filebuf for gzipped files. It doesn't yet support + * seeking (allowed by zlib but slow/limited), putback and read/write access + * (tricky). Otherwise, it attempts to be a drop-in replacement for the standard + * file streambuf. +*/ +class gzfilebuf : public std::streambuf +{ +public: + // Default constructor. + gzfilebuf(); + + // Destructor. + virtual + ~gzfilebuf(); + + /** + * @brief Set compression level and strategy on the fly. + * @param comp_level Compression level (see zlib.h for allowed values) + * @param comp_strategy Compression strategy (see zlib.h for allowed values) + * @return Z_OK on success, Z_STREAM_ERROR otherwise. + * + * Unfortunately, these parameters cannot be modified separately, as the + * previous zfstream version assumed. Since the strategy is seldom changed, + * it can default and setcompression(level) then becomes like the old + * setcompressionlevel(level). + */ + int + setcompression(int comp_level, + int comp_strategy = Z_DEFAULT_STRATEGY); + + /** + * @brief Check if file is open. + * @return True if file is open. + */ + bool + is_open() const { return (file != NULL); } + + /** + * @brief Open gzipped file. + * @param name File name. + * @param mode Open mode flags. + * @return @c this on success, NULL on failure. + */ + gzfilebuf* + open(const char* name, + std::ios_base::openmode mode); + + /** + * @brief Attach to already open gzipped file. + * @param fd File descriptor. + * @param mode Open mode flags. + * @return @c this on success, NULL on failure. + */ + gzfilebuf* + attach(int fd, + std::ios_base::openmode mode); + + /** + * @brief Close gzipped file. + * @return @c this on success, NULL on failure. + */ + gzfilebuf* + close(); + +protected: + /** + * @brief Convert ios open mode int to mode string used by zlib. + * @return True if valid mode flag combination. + */ + bool + open_mode(std::ios_base::openmode mode, + char* c_mode) const; + + /** + * @brief Number of characters available in stream buffer. + * @return Number of characters. + * + * This indicates number of characters in get area of stream buffer. + * These characters can be read without accessing the gzipped file. + */ + virtual std::streamsize + showmanyc(); + + /** + * @brief Fill get area from gzipped file. + * @return First character in get area on success, EOF on error. + * + * This actually reads characters from gzipped file to stream + * buffer. Always buffered. + */ + virtual int_type + underflow(); + + /** + * @brief Write put area to gzipped file. + * @param c Extra character to add to buffer contents. + * @return Non-EOF on success, EOF on error. + * + * This actually writes characters in stream buffer to + * gzipped file. With unbuffered output this is done one + * character at a time. + */ + virtual int_type + overflow(int_type c = traits_type::eof()); + + /** + * @brief Installs external stream buffer. + * @param p Pointer to char buffer. + * @param n Size of external buffer. + * @return @c this on success, NULL on failure. + * + * Call setbuf(0,0) to enable unbuffered output. + */ + virtual std::streambuf* + setbuf(char_type* p, + std::streamsize n); + + /** + * @brief Flush stream buffer to file. + * @return 0 on success, -1 on error. + * + * This calls underflow(EOF) to do the job. + */ + virtual int + sync(); + +// +// Some future enhancements +// +// virtual int_type uflow(); +// virtual int_type pbackfail(int_type c = traits_type::eof()); +// virtual pos_type +// seekoff(off_type off, +// std::ios_base::seekdir way, +// std::ios_base::openmode mode = std::ios_base::in|std::ios_base::out); +// virtual pos_type +// seekpos(pos_type sp, +// std::ios_base::openmode mode = std::ios_base::in|std::ios_base::out); + +private: + /** + * @brief Allocate internal buffer. + * + * This function is safe to call multiple times. It will ensure + * that a proper internal buffer exists if it is required. If the + * buffer already exists or is external, the buffer pointers will be + * reset to their original state. + */ + void + enable_buffer(); + + /** + * @brief Destroy internal buffer. + * + * This function is safe to call multiple times. It will ensure + * that the internal buffer is deallocated if it exists. In any + * case, it will also reset the buffer pointers. + */ + void + disable_buffer(); + + /** + * Underlying file pointer. + */ + gzFile file; + + /** + * Mode in which file was opened. + */ + std::ios_base::openmode io_mode; + + /** + * @brief True if this object owns file descriptor. + * + * This makes the class responsible for closing the file + * upon destruction. + */ + bool own_fd; + + /** + * @brief Stream buffer. + * + * For simplicity this remains allocated on the free store for the + * entire life span of the gzfilebuf object, unless replaced by setbuf. + */ + char_type* buffer; + + /** + * @brief Stream buffer size. + * + * Defaults to system default buffer size (typically 8192 bytes). + * Modified by setbuf. + */ + std::streamsize buffer_size; + + /** + * @brief True if this object owns stream buffer. + * + * This makes the class responsible for deleting the buffer + * upon destruction. + */ + bool own_buffer; +}; + +/*****************************************************************************/ + +/** + * @brief Gzipped file input stream class. + * + * This class implements ifstream for gzipped files. Seeking and putback + * is not supported yet. +*/ +class gzifstream : public std::istream +{ +public: + // Default constructor + gzifstream(); + + /** + * @brief Construct stream on gzipped file to be opened. + * @param name File name. + * @param mode Open mode flags (forced to contain ios::in). + */ + explicit + gzifstream(const char* name, + std::ios_base::openmode mode = std::ios_base::in); + + /** + * @brief Construct stream on already open gzipped file. + * @param fd File descriptor. + * @param mode Open mode flags (forced to contain ios::in). + */ + explicit + gzifstream(int fd, + std::ios_base::openmode mode = std::ios_base::in); + + /** + * Obtain underlying stream buffer. + */ + gzfilebuf* + rdbuf() const + { return const_cast(&sb); } + + /** + * @brief Check if file is open. + * @return True if file is open. + */ + bool + is_open() { return sb.is_open(); } + + /** + * @brief Open gzipped file. + * @param name File name. + * @param mode Open mode flags (forced to contain ios::in). + * + * Stream will be in state good() if file opens successfully; + * otherwise in state fail(). This differs from the behavior of + * ifstream, which never sets the state to good() and therefore + * won't allow you to reuse the stream for a second file unless + * you manually clear() the state. The choice is a matter of + * convenience. + */ + void + open(const char* name, + std::ios_base::openmode mode = std::ios_base::in); + + /** + * @brief Attach to already open gzipped file. + * @param fd File descriptor. + * @param mode Open mode flags (forced to contain ios::in). + * + * Stream will be in state good() if attach succeeded; otherwise + * in state fail(). + */ + void + attach(int fd, + std::ios_base::openmode mode = std::ios_base::in); + + /** + * @brief Close gzipped file. + * + * Stream will be in state fail() if close failed. + */ + void + close(); + +private: + /** + * Underlying stream buffer. + */ + gzfilebuf sb; +}; + +/*****************************************************************************/ + +/** + * @brief Gzipped file output stream class. + * + * This class implements ofstream for gzipped files. Seeking and putback + * is not supported yet. +*/ +class gzofstream : public std::ostream +{ +public: + // Default constructor + gzofstream(); + + /** + * @brief Construct stream on gzipped file to be opened. + * @param name File name. + * @param mode Open mode flags (forced to contain ios::out). + */ + explicit + gzofstream(const char* name, + std::ios_base::openmode mode = std::ios_base::out); + + /** + * @brief Construct stream on already open gzipped file. + * @param fd File descriptor. + * @param mode Open mode flags (forced to contain ios::out). + */ + explicit + gzofstream(int fd, + std::ios_base::openmode mode = std::ios_base::out); + + /** + * Obtain underlying stream buffer. + */ + gzfilebuf* + rdbuf() const + { return const_cast(&sb); } + + /** + * @brief Check if file is open. + * @return True if file is open. + */ + bool + is_open() { return sb.is_open(); } + + /** + * @brief Open gzipped file. + * @param name File name. + * @param mode Open mode flags (forced to contain ios::out). + * + * Stream will be in state good() if file opens successfully; + * otherwise in state fail(). This differs from the behavior of + * ofstream, which never sets the state to good() and therefore + * won't allow you to reuse the stream for a second file unless + * you manually clear() the state. The choice is a matter of + * convenience. + */ + void + open(const char* name, + std::ios_base::openmode mode = std::ios_base::out); + + /** + * @brief Attach to already open gzipped file. + * @param fd File descriptor. + * @param mode Open mode flags (forced to contain ios::out). + * + * Stream will be in state good() if attach succeeded; otherwise + * in state fail(). + */ + void + attach(int fd, + std::ios_base::openmode mode = std::ios_base::out); + + /** + * @brief Close gzipped file. + * + * Stream will be in state fail() if close failed. + */ + void + close(); + +private: + /** + * Underlying stream buffer. + */ + gzfilebuf sb; +}; + +/*****************************************************************************/ + +/** + * @brief Gzipped file output stream manipulator class. + * + * This class defines a two-argument manipulator for gzofstream. It is used + * as base for the setcompression(int,int) manipulator. +*/ +template + class gzomanip2 + { + public: + // Allows insertor to peek at internals + template + friend gzofstream& + operator<<(gzofstream&, + const gzomanip2&); + + // Constructor + gzomanip2(gzofstream& (*f)(gzofstream&, T1, T2), + T1 v1, + T2 v2); + private: + // Underlying manipulator function + gzofstream& + (*func)(gzofstream&, T1, T2); + + // Arguments for manipulator function + T1 val1; + T2 val2; + }; + +/*****************************************************************************/ + +// Manipulator function thunks through to stream buffer +inline gzofstream& +setcompression(gzofstream &gzs, int l, int s = Z_DEFAULT_STRATEGY) +{ + (gzs.rdbuf())->setcompression(l, s); + return gzs; +} + +// Manipulator constructor stores arguments +template + inline + gzomanip2::gzomanip2(gzofstream &(*f)(gzofstream &, T1, T2), + T1 v1, + T2 v2) + : func(f), val1(v1), val2(v2) + { } + +// Insertor applies underlying manipulator function to stream +template + inline gzofstream& + operator<<(gzofstream& s, const gzomanip2& m) + { return (*m.func)(s, m.val1, m.val2); } + +// Insert this onto stream to simplify setting of compression level +inline gzomanip2 +setcompression(int l, int s = Z_DEFAULT_STRATEGY) +{ return gzomanip2(&setcompression, l, s); } + +#endif // ZFSTREAM_H Added: external/zlib/contrib/masm686/match.asm ============================================================================== --- (empty file) +++ external/zlib/contrib/masm686/match.asm Tue Jan 3 07:42:59 2006 @@ -0,0 +1,413 @@ + +; match.asm -- Pentium-Pro optimized version of longest_match() +; +; Updated for zlib 1.1.3 and converted to MASM 6.1x +; Copyright (C) 2000 Dan Higdon +; and Chuck Walbourn +; Corrections by Cosmin Truta +; +; This is free software; you can redistribute it and/or modify it +; under the terms of the GNU General Public License. + +; Based on match.S +; Written for zlib 1.1.2 +; Copyright (C) 1998 Brian Raiter +; +; Modified by Gilles Vollant (2005) for add gzhead and gzindex + + .686P + .MODEL FLAT + +;=========================================================================== +; EQUATES +;=========================================================================== + +MAX_MATCH EQU 258 +MIN_MATCH EQU 3 +MIN_LOOKAHEAD EQU (MAX_MATCH + MIN_MATCH + 1) +MAX_MATCH_8 EQU ((MAX_MATCH + 7) AND (NOT 7)) + +;=========================================================================== +; STRUCTURES +;=========================================================================== + +; This STRUCT assumes a 4-byte alignment + +DEFLATE_STATE STRUCT +ds_strm dd ? +ds_status dd ? +ds_pending_buf dd ? +ds_pending_buf_size dd ? +ds_pending_out dd ? +ds_pending dd ? +ds_wrap dd ? +; gzhead and gzindex are added in zlib 1.2.2.2 (see deflate.h) +ds_gzhead dd ? +ds_gzindex dd ? +ds_data_type db ? +ds_method db ? + db ? ; padding + db ? ; padding +ds_last_flush dd ? +ds_w_size dd ? ; used +ds_w_bits dd ? +ds_w_mask dd ? ; used +ds_window dd ? ; used +ds_window_size dd ? +ds_prev dd ? ; used +ds_head dd ? +ds_ins_h dd ? +ds_hash_size dd ? +ds_hash_bits dd ? +ds_hash_mask dd ? +ds_hash_shift dd ? +ds_block_start dd ? +ds_match_length dd ? ; used +ds_prev_match dd ? ; used +ds_match_available dd ? +ds_strstart dd ? ; used +ds_match_start dd ? ; used +ds_lookahead dd ? ; used +ds_prev_length dd ? ; used +ds_max_chain_length dd ? ; used +ds_max_laxy_match dd ? +ds_level dd ? +ds_strategy dd ? +ds_good_match dd ? ; used +ds_nice_match dd ? ; used + +; Don't need anymore of the struct for match +DEFLATE_STATE ENDS + +;=========================================================================== +; CODE +;=========================================================================== +_TEXT SEGMENT + +;--------------------------------------------------------------------------- +; match_init +;--------------------------------------------------------------------------- + ALIGN 4 +PUBLIC _match_init +_match_init PROC + ; no initialization needed + ret +_match_init ENDP + +;--------------------------------------------------------------------------- +; uInt longest_match(deflate_state *deflatestate, IPos curmatch) +;--------------------------------------------------------------------------- + ALIGN 4 + +PUBLIC _longest_match +_longest_match PROC + +; Since this code uses EBP for a scratch register, the stack frame must +; be manually constructed and referenced relative to the ESP register. + +; Stack image +; Variables +chainlenwmask = 0 ; high word: current chain len + ; low word: s->wmask +window = 4 ; local copy of s->window +windowbestlen = 8 ; s->window + bestlen +scanend = 12 ; last two bytes of string +scanstart = 16 ; first two bytes of string +scanalign = 20 ; dword-misalignment of string +nicematch = 24 ; a good enough match size +bestlen = 28 ; size of best match so far +scan = 32 ; ptr to string wanting match +varsize = 36 ; number of bytes (also offset to last saved register) + +; Saved Registers (actually pushed into place) +ebx_save = 36 +edi_save = 40 +esi_save = 44 +ebp_save = 48 + +; Parameters +retaddr = 52 +deflatestate = 56 +curmatch = 60 + +; Save registers that the compiler may be using + push ebp + push edi + push esi + push ebx + +; Allocate local variable space + sub esp,varsize + +; Retrieve the function arguments. ecx will hold cur_match +; throughout the entire function. edx will hold the pointer to the +; deflate_state structure during the function's setup (before +; entering the main loop). + + mov edx, [esp+deflatestate] +ASSUME edx:PTR DEFLATE_STATE + + mov ecx, [esp+curmatch] + +; uInt wmask = s->w_mask; +; unsigned chain_length = s->max_chain_length; +; if (s->prev_length >= s->good_match) { +; chain_length >>= 2; +; } + + mov eax, [edx].ds_prev_length + mov ebx, [edx].ds_good_match + cmp eax, ebx + mov eax, [edx].ds_w_mask + mov ebx, [edx].ds_max_chain_length + jl SHORT LastMatchGood + shr ebx, 2 +LastMatchGood: + +; chainlen is decremented once beforehand so that the function can +; use the sign flag instead of the zero flag for the exit test. +; It is then shifted into the high word, to make room for the wmask +; value, which it will always accompany. + + dec ebx + shl ebx, 16 + or ebx, eax + mov [esp+chainlenwmask], ebx + +; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + + mov eax, [edx].ds_nice_match + mov ebx, [edx].ds_lookahead + cmp ebx, eax + jl SHORT LookaheadLess + mov ebx, eax +LookaheadLess: + mov [esp+nicematch], ebx + +;/* register Bytef *scan = s->window + s->strstart; */ + + mov esi, [edx].ds_window + mov [esp+window], esi + mov ebp, [edx].ds_strstart + lea edi, [esi+ebp] + mov [esp+scan],edi + +;/* Determine how many bytes the scan ptr is off from being */ +;/* dword-aligned. */ + + mov eax, edi + neg eax + and eax, 3 + mov [esp+scanalign], eax + +;/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ +;/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ + + mov eax, [edx].ds_w_size + sub eax, MIN_LOOKAHEAD + sub ebp, eax + jg SHORT LimitPositive + xor ebp, ebp +LimitPositive: + +;/* int best_len = s->prev_length; */ + + mov eax, [edx].ds_prev_length + mov [esp+bestlen], eax + +;/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ + + add esi, eax + mov [esp+windowbestlen], esi + +;/* register ush scan_start = *(ushf*)scan; */ +;/* register ush scan_end = *(ushf*)(scan+best_len-1); */ +;/* Posf *prev = s->prev; */ + + movzx ebx, WORD PTR[edi] + mov [esp+scanstart], ebx + movzx ebx, WORD PTR[eax+edi-1] + mov [esp+scanend], ebx + mov edi, [edx].ds_prev + +;/* Jump into the main loop. */ + + mov edx, [esp+chainlenwmask] + jmp SHORT LoopEntry + +;/* do { +; * match = s->window + cur_match; +; * if (*(ushf*)(match+best_len-1) != scan_end || +; * *(ushf*)match != scan_start) continue; +; * [...] +; * } while ((cur_match = prev[cur_match & wmask]) > limit +; * && --chain_length != 0); +; * +; * Here is the inner loop of the function. The function will spend the +; * majority of its time in this loop, and majority of that time will +; * be spent in the first ten instructions. +; * +; * Within this loop: +; * %ebx = scanend +; * %ecx = curmatch +; * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) +; * %esi = windowbestlen - i.e., (window + bestlen) +; * %edi = prev +; * %ebp = limit +; */ + + ALIGN 4 +LookupLoop: + and ecx, edx + movzx ecx, WORD PTR[edi+ecx*2] + cmp ecx, ebp + jbe LeaveNow + sub edx, 000010000H + js LeaveNow + +LoopEntry: + movzx eax, WORD PTR[esi+ecx-1] + cmp eax, ebx + jnz SHORT LookupLoop + + mov eax, [esp+window] + movzx eax, WORD PTR[eax+ecx] + cmp eax, [esp+scanstart] + jnz SHORT LookupLoop + +;/* Store the current value of chainlen. */ + + mov [esp+chainlenwmask], edx + +;/* Point %edi to the string under scrutiny, and %esi to the string we */ +;/* are hoping to match it up with. In actuality, %esi and %edi are */ +;/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ +;/* initialized to -(MAX_MATCH_8 - scanalign). */ + + mov esi, [esp+window] + mov edi, [esp+scan] + add esi, ecx + mov eax, [esp+scanalign] + mov edx, -MAX_MATCH_8 + lea edi, [edi+eax+MAX_MATCH_8] + lea esi, [esi+eax+MAX_MATCH_8] + +;/* Test the strings for equality, 8 bytes at a time. At the end, +; * adjust %edx so that it is offset to the exact byte that mismatched. +; * +; * We already know at this point that the first three bytes of the +; * strings match each other, and they can be safely passed over before +; * starting the compare loop. So what this code does is skip over 0-3 +; * bytes, as much as necessary in order to dword-align the %edi +; * pointer. (%esi will still be misaligned three times out of four.) +; * +; * It should be confessed that this loop usually does not represent +; * much of the total running time. Replacing it with a more +; * straightforward "rep cmpsb" would not drastically degrade +; * performance. +; */ + +LoopCmps: + mov eax, DWORD PTR[esi+edx] + xor eax, DWORD PTR[edi+edx] + jnz SHORT LeaveLoopCmps + + mov eax, DWORD PTR[esi+edx+4] + xor eax, DWORD PTR[edi+edx+4] + jnz SHORT LeaveLoopCmps4 + + add edx, 8 + jnz SHORT LoopCmps + jmp LenMaximum + ALIGN 4 + +LeaveLoopCmps4: + add edx, 4 + +LeaveLoopCmps: + test eax, 00000FFFFH + jnz SHORT LenLower + + add edx, 2 + shr eax, 16 + +LenLower: + sub al, 1 + adc edx, 0 + +;/* Calculate the length of the match. If it is longer than MAX_MATCH, */ +;/* then automatically accept it as the best possible match and leave. */ + + lea eax, [edi+edx] + mov edi, [esp+scan] + sub eax, edi + cmp eax, MAX_MATCH + jge SHORT LenMaximum + +;/* If the length of the match is not longer than the best match we */ +;/* have so far, then forget it and return to the lookup loop. */ + + mov edx, [esp+deflatestate] + mov ebx, [esp+bestlen] + cmp eax, ebx + jg SHORT LongerMatch + mov esi, [esp+windowbestlen] + mov edi, [edx].ds_prev + mov ebx, [esp+scanend] + mov edx, [esp+chainlenwmask] + jmp LookupLoop + ALIGN 4 + +;/* s->match_start = cur_match; */ +;/* best_len = len; */ +;/* if (len >= nice_match) break; */ +;/* scan_end = *(ushf*)(scan+best_len-1); */ + +LongerMatch: + mov ebx, [esp+nicematch] + mov [esp+bestlen], eax + mov [edx].ds_match_start, ecx + cmp eax, ebx + jge SHORT LeaveNow + mov esi, [esp+window] + add esi, eax + mov [esp+windowbestlen], esi + movzx ebx, WORD PTR[edi+eax-1] + mov edi, [edx].ds_prev + mov [esp+scanend], ebx + mov edx, [esp+chainlenwmask] + jmp LookupLoop + ALIGN 4 + +;/* Accept the current string, with the maximum possible length. */ + +LenMaximum: + mov edx, [esp+deflatestate] + mov DWORD PTR[esp+bestlen], MAX_MATCH + mov [edx].ds_match_start, ecx + +;/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ +;/* return s->lookahead; */ + +LeaveNow: + mov edx, [esp+deflatestate] + mov ebx, [esp+bestlen] + mov eax, [edx].ds_lookahead + cmp ebx, eax + jg SHORT LookaheadRet + mov eax, ebx +LookaheadRet: + +; Restore the stack and return from whence we came. + + add esp, varsize + pop ebx + pop esi + pop edi + pop ebp + ret + +_longest_match ENDP + +_TEXT ENDS +END Added: external/zlib/contrib/masmx64/bld_ml64.bat ============================================================================== --- (empty file) +++ external/zlib/contrib/masmx64/bld_ml64.bat Tue Jan 3 07:42:59 2006 @@ -0,0 +1,2 @@ +ml64.exe /Flinffasx64 /c /Zi inffasx64.asm +ml64.exe /Flgvmat64 /c /Zi gvmat64.asm Added: external/zlib/contrib/masmx64/gvmat64.asm ============================================================================== --- (empty file) +++ external/zlib/contrib/masmx64/gvmat64.asm Tue Jan 3 07:42:59 2006 @@ -0,0 +1,513 @@ +;uInt longest_match_x64( +; deflate_state *s, +; IPos cur_match); /* current match */ + +; gvmat64.asm -- Asm portion of the optimized longest_match for 32 bits x86 +; Copyright (C) 1995-2005 Jean-loup Gailly, Brian Raiter and Gilles Vollant. +; +; File written by Gilles Vollant, by converting to assembly the longest_match +; from Jean-loup Gailly in deflate.c of zLib and infoZip zip. +; +; and by taking inspiration on asm686 with masm, optimised assembly code +; from Brian Raiter, written 1998 +; +; http://www.zlib.net +; http://www.winimage.com/zLibDll +; http://www.muppetlabs.com/~breadbox/software/assembly.html +; +; to compile this file for infozip Zip, I use option: +; ml64.exe /Flgvmat64 /c /Zi /DINFOZIP gvmat64.asm +; +; to compile this file for zLib, I use option: +; ml64.exe /Flgvmat64 /c /Zi gvmat64.asm +; Be carrefull to adapt zlib1222add below to your version of zLib +; (if you use a version of zLib before 1.0.4 or after 1.2.2.2, change +; value of zlib1222add later) +; +; This file compile with Microsoft Macro Assembler (x64) for AMD64 +; +; ml64.exe is given with Visual Studio 2005 and Windows 2003 server DDK +; +; (you can get Windows 2003 server DDK with ml64 and cl for AMD64 from +; http://www.microsoft.com/whdc/devtools/ddk/default.mspx for low price) +; + + +;uInt longest_match(s, cur_match) +; deflate_state *s; +; IPos cur_match; /* current match */ +.code +longest_match PROC + + +;LocalVarsSize equ 88 + LocalVarsSize equ 72 + +; register used : rax,rbx,rcx,rdx,rsi,rdi,r8,r9,r10,r11,r12 +; free register : r14,r15 +; register can be saved : rsp + + chainlenwmask equ rsp + 8 - LocalVarsSize ; high word: current chain len + ; low word: s->wmask +;window equ rsp + xx - LocalVarsSize ; local copy of s->window ; stored in r10 +;windowbestlen equ rsp + xx - LocalVarsSize ; s->window + bestlen , use r10+r11 +;scanstart equ rsp + xx - LocalVarsSize ; first two bytes of string ; stored in r12w +;scanend equ rsp + xx - LocalVarsSize ; last two bytes of string use ebx +;scanalign equ rsp + xx - LocalVarsSize ; dword-misalignment of string r13 +;bestlen equ rsp + xx - LocalVarsSize ; size of best match so far -> r11d +;scan equ rsp + xx - LocalVarsSize ; ptr to string wanting match -> r9 +IFDEF INFOZIP +ELSE + nicematch equ (rsp + 16 - LocalVarsSize) ; a good enough match size +ENDIF + +save_rdi equ rsp + 24 - LocalVarsSize +save_rsi equ rsp + 32 - LocalVarsSize +save_rbx equ rsp + 40 - LocalVarsSize +save_rbp equ rsp + 48 - LocalVarsSize +save_r12 equ rsp + 56 - LocalVarsSize +save_r13 equ rsp + 64 - LocalVarsSize +;save_r14 equ rsp + 72 - LocalVarsSize +;save_r15 equ rsp + 80 - LocalVarsSize + + + +; all the +4 offsets are due to the addition of pending_buf_size (in zlib +; in the deflate_state structure since the asm code was first written +; (if you compile with zlib 1.0.4 or older, remove the +4). +; Note : these value are good with a 8 bytes boundary pack structure + + + MAX_MATCH equ 258 + MIN_MATCH equ 3 + MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) + + +;;; Offsets for fields in the deflate_state structure. These numbers +;;; are calculated from the definition of deflate_state, with the +;;; assumption that the compiler will dword-align the fields. (Thus, +;;; changing the definition of deflate_state could easily cause this +;;; program to crash horribly, without so much as a warning at +;;; compile time. Sigh.) + +; all the +zlib1222add offsets are due to the addition of fields +; in zlib in the deflate_state structure since the asm code was first written +; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). +; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). +; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). + + +IFDEF INFOZIP + +_DATA SEGMENT +COMM window_size:DWORD +; WMask ; 7fff +COMM window:BYTE:010040H +COMM prev:WORD:08000H +; MatchLen : unused +; PrevMatch : unused +COMM strstart:DWORD +COMM match_start:DWORD +; Lookahead : ignore +COMM prev_length:DWORD ; PrevLen +COMM max_chain_length:DWORD +COMM good_match:DWORD +COMM nice_match:DWORD +prev_ad equ OFFSET prev +window_ad equ OFFSET window +nicematch equ nice_match +_DATA ENDS +WMask equ 07fffh + +ELSE + + IFNDEF zlib1222add + zlib1222add equ 8 + ENDIF +dsWSize equ 56+zlib1222add+(zlib1222add/2) +dsWMask equ 64+zlib1222add+(zlib1222add/2) +dsWindow equ 72+zlib1222add +dsPrev equ 88+zlib1222add +dsMatchLen equ 128+zlib1222add +dsPrevMatch equ 132+zlib1222add +dsStrStart equ 140+zlib1222add +dsMatchStart equ 144+zlib1222add +dsLookahead equ 148+zlib1222add +dsPrevLen equ 152+zlib1222add +dsMaxChainLen equ 156+zlib1222add +dsGoodMatch equ 172+zlib1222add +dsNiceMatch equ 176+zlib1222add + +window_size equ [ rcx + dsWSize] +WMask equ [ rcx + dsWMask] +window_ad equ [ rcx + dsWindow] +prev_ad equ [ rcx + dsPrev] +strstart equ [ rcx + dsStrStart] +match_start equ [ rcx + dsMatchStart] +Lookahead equ [ rcx + dsLookahead] ; 0ffffffffh on infozip +prev_length equ [ rcx + dsPrevLen] +max_chain_length equ [ rcx + dsMaxChainLen] +good_match equ [ rcx + dsGoodMatch] +nice_match equ [ rcx + dsNiceMatch] +ENDIF + +; parameter 1 in r8(deflate state s), param 2 in rdx (cur match) + +; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and +; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp +; +; All registers must be preserved across the call, except for +; rax, rcx, rdx, r8, r9, r10, and r11, which are scratch. + + + +;;; Save registers that the compiler may be using, and adjust esp to +;;; make room for our stack frame. + + +;;; Retrieve the function arguments. r8d will hold cur_match +;;; throughout the entire function. edx will hold the pointer to the +;;; deflate_state structure during the function's setup (before +;;; entering the main loop. + +; parameter 1 in rcx (deflate_state* s), param 2 in edx -> r8 (cur match) + +; this clear high 32 bits of r8, which can be garbage in both r8 and rdx + + mov [save_rdi],rdi + mov [save_rsi],rsi + mov [save_rbx],rbx + mov [save_rbp],rbp +IFDEF INFOZIP + mov r8d,ecx +ELSE + mov r8d,edx +ENDIF + mov [save_r12],r12 + mov [save_r13],r13 +; mov [save_r14],r14 +; mov [save_r15],r15 + + +;;; uInt wmask = s->w_mask; +;;; unsigned chain_length = s->max_chain_length; +;;; if (s->prev_length >= s->good_match) { +;;; chain_length >>= 2; +;;; } + + mov edi, prev_length + mov esi, good_match + mov eax, WMask + mov ebx, max_chain_length + cmp edi, esi + jl LastMatchGood + shr ebx, 2 +LastMatchGood: + +;;; chainlen is decremented once beforehand so that the function can +;;; use the sign flag instead of the zero flag for the exit test. +;;; It is then shifted into the high word, to make room for the wmask +;;; value, which it will always accompany. + + dec ebx + shl ebx, 16 + or ebx, eax + +;;; on zlib only +;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + +IFDEF INFOZIP + mov [chainlenwmask], ebx +; on infozip nice_match = [nice_match] +ELSE + mov eax, nice_match + mov [chainlenwmask], ebx + mov r10d, Lookahead + cmp r10d, eax + cmovnl r10d, eax + mov [nicematch],r10d +ENDIF + +;;; register Bytef *scan = s->window + s->strstart; + mov r10, window_ad + mov ebp, strstart + lea r13, [r10 + rbp] + +;;; Determine how many bytes the scan ptr is off from being +;;; dword-aligned. + + mov r9,r13 + neg r13 + and r13,3 + +;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? +;;; s->strstart - (IPos)MAX_DIST(s) : NIL; +IFDEF INFOZIP + mov eax,07efah ; MAX_DIST = (WSIZE-MIN_LOOKAHEAD) (0x8000-(3+8+1)) +ELSE + mov eax, window_size + sub eax, MIN_LOOKAHEAD +ENDIF + xor edi,edi + sub ebp, eax + + mov r11d, prev_length + + cmovng ebp,edi + +;;; int best_len = s->prev_length; + + +;;; Store the sum of s->window + best_len in esi locally, and in esi. + + lea rsi,[r10+r11] + +;;; register ush scan_start = *(ushf*)scan; +;;; register ush scan_end = *(ushf*)(scan+best_len-1); +;;; Posf *prev = s->prev; + + movzx r12d,word ptr [r9] + movzx ebx, word ptr [r9 + r11 - 1] + + mov rdi, prev_ad + +;;; Jump into the main loop. + + mov edx, [chainlenwmask] + + cmp bx,word ptr [rsi + r8 - 1] + jz LookupLoopIsZero + +LookupLoop1: + and r8d, edx + + movzx r8d, word ptr [rdi + r8*2] + cmp r8d, ebp + jbe LeaveNow + sub edx, 00010000h + js LeaveNow + +LoopEntry1: + cmp bx,word ptr [rsi + r8 - 1] + jz LookupLoopIsZero + +LookupLoop2: + and r8d, edx + + movzx r8d, word ptr [rdi + r8*2] + cmp r8d, ebp + jbe LeaveNow + sub edx, 00010000h + js LeaveNow + +LoopEntry2: + cmp bx,word ptr [rsi + r8 - 1] + jz LookupLoopIsZero + +LookupLoop4: + and r8d, edx + + movzx r8d, word ptr [rdi + r8*2] + cmp r8d, ebp + jbe LeaveNow + sub edx, 00010000h + js LeaveNow + +LoopEntry4: + + cmp bx,word ptr [rsi + r8 - 1] + jnz LookupLoop1 + jmp LookupLoopIsZero + + +;;; do { +;;; match = s->window + cur_match; +;;; if (*(ushf*)(match+best_len-1) != scan_end || +;;; *(ushf*)match != scan_start) continue; +;;; [...] +;;; } while ((cur_match = prev[cur_match & wmask]) > limit +;;; && --chain_length != 0); +;;; +;;; Here is the inner loop of the function. The function will spend the +;;; majority of its time in this loop, and majority of that time will +;;; be spent in the first ten instructions. +;;; +;;; Within this loop: +;;; ebx = scanend +;;; r8d = curmatch +;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) +;;; esi = windowbestlen - i.e., (window + bestlen) +;;; edi = prev +;;; ebp = limit + +LookupLoop: + and r8d, edx + + movzx r8d, word ptr [rdi + r8*2] + cmp r8d, ebp + jbe LeaveNow + sub edx, 00010000h + js LeaveNow + +LoopEntry: + + cmp bx,word ptr [rsi + r8 - 1] + jnz LookupLoop1 +LookupLoopIsZero: + cmp r12w, word ptr [r10 + r8] + jnz LookupLoop1 + + +;;; Store the current value of chainlen. + mov [chainlenwmask], edx + +;;; Point edi to the string under scrutiny, and esi to the string we +;;; are hoping to match it up with. In actuality, esi and edi are +;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is +;;; initialized to -(MAX_MATCH_8 - scanalign). + + lea rsi,[r8+r10] + mov rdx, 0fffffffffffffef8h; -(MAX_MATCH_8) + lea rsi, [rsi + r13 + 0108h] ;MAX_MATCH_8] + lea rdi, [r9 + r13 + 0108h] ;MAX_MATCH_8] + + prefetcht1 [rsi+rdx] + prefetcht1 [rdi+rdx] + + +;;; Test the strings for equality, 8 bytes at a time. At the end, +;;; adjust rdx so that it is offset to the exact byte that mismatched. +;;; +;;; We already know at this point that the first three bytes of the +;;; strings match each other, and they can be safely passed over before +;;; starting the compare loop. So what this code does is skip over 0-3 +;;; bytes, as much as necessary in order to dword-align the edi +;;; pointer. (rsi will still be misaligned three times out of four.) +;;; +;;; It should be confessed that this loop usually does not represent +;;; much of the total running time. Replacing it with a more +;;; straightforward "rep cmpsb" would not drastically degrade +;;; performance. + + +LoopCmps: + mov rax, [rsi + rdx] + xor rax, [rdi + rdx] + jnz LeaveLoopCmps + + mov rax, [rsi + rdx + 8] + xor rax, [rdi + rdx + 8] + jnz LeaveLoopCmps8 + + + mov rax, [rsi + rdx + 8+8] + xor rax, [rdi + rdx + 8+8] + jnz LeaveLoopCmps16 + + add rdx,8+8+8 + + jmp short LoopCmps +LeaveLoopCmps16: add rdx,8 +LeaveLoopCmps8: add rdx,8 +LeaveLoopCmps: + + test eax, 0000FFFFh + jnz LenLower + + test eax,0ffffffffh + + jnz LenLower32 + + add rdx,4 + shr rax,32 + or ax,ax + jnz LenLower + +LenLower32: + shr eax,16 + add rdx,2 +LenLower: sub al, 1 + adc rdx, 0 +;;; Calculate the length of the match. If it is longer than MAX_MATCH, +;;; then automatically accept it as the best possible match and leave. + + lea rax, [rdi + rdx] + sub rax, r9 + cmp eax, MAX_MATCH + jge LenMaximum + +;;; If the length of the match is not longer than the best match we +;;; have so far, then forget it and return to the lookup loop. +;/////////////////////////////////// + + cmp eax, r11d + jg LongerMatch + + lea rsi,[r10+r11] + + mov rdi, prev_ad + mov edx, [chainlenwmask] + jmp LookupLoop + +;;; s->match_start = cur_match; +;;; best_len = len; +;;; if (len >= nice_match) break; +;;; scan_end = *(ushf*)(scan+best_len-1); + +LongerMatch: + mov r11d, eax + mov match_start, r8d + cmp eax, [nicematch] + jge LeaveNow + + lea rsi,[r10+rax] + + movzx ebx, word ptr [r9 + rax - 1] + mov rdi, prev_ad + mov edx, [chainlenwmask] + jmp LookupLoop + +;;; Accept the current string, with the maximum possible length. + +LenMaximum: + mov r11d,MAX_MATCH + mov match_start, r8d + +;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; +;;; return s->lookahead; + +LeaveNow: +IFDEF INFOZIP + mov eax,r11d +ELSE + mov eax, Lookahead + cmp r11d, eax + cmovng eax, r11d +ENDIF + +;;; Restore the stack and return from whence we came. + + + mov rsi,[save_rsi] + mov rdi,[save_rdi] + mov rbx,[save_rbx] + mov rbp,[save_rbp] + mov r12,[save_r12] + mov r13,[save_r13] +; mov r14,[save_r14] +; mov r15,[save_r15] + + + ret 0 +; please don't remove this string ! +; Your can freely use gvmat64 in any free or commercial app +; but it is far better don't remove the string in the binary! + db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998, converted to amd 64 by Gilles Vollant 2005",0dh,0ah,0 +longest_match ENDP + +match_init PROC + ret 0 +match_init ENDP + + +END Added: external/zlib/contrib/masmx64/gvmat64.obj ============================================================================== Binary file. No diff available. Added: external/zlib/contrib/masmx64/inffas8664.c ============================================================================== --- (empty file) +++ external/zlib/contrib/masmx64/inffas8664.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,186 @@ +/* inffas8664.c is a hand tuned assembler version of inffast.c - fast decoding + * version for AMD64 on Windows using Microsoft C compiler + * + * Copyright (C) 1995-2003 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Copyright (C) 2003 Chris Anderson + * Please use the copyright conditions above. + * + * 2005 - Adaptation to Microsoft C Compiler for AMD64 by Gilles Vollant + * + * inffas8664.c call function inffas8664fnc in inffasx64.asm + * inffasx64.asm is automatically convert from AMD64 portion of inffas86.c + * + * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also + * slightly quicker on x86 systems because, instead of using rep movsb to copy + * data, it uses rep movsw, which moves data in 2-byte chunks instead of single + * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates + * from http://fedora.linux.duke.edu/fc1_x86_64 + * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with + * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version, + * when decompressing mozilla-source-1.3.tar.gz. + * + * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from + * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at + * the moment. I have successfully compiled and tested this code with gcc2.96, + * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S + * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX + * enabled. I will attempt to merge the MMX code into this version. Newer + * versions of this and inffast.S can be found at + * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/ + * + */ + +#include +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +/* Mark Adler's comments from inffast.c: */ + +/* + Decode literal, length, and distance codes and write out the resulting + literal and match bytes until either not enough input or output is + available, an end-of-block is encountered, or a data error is encountered. + When large enough input and output buffers are supplied to inflate(), for + example, a 16K input buffer and a 64K output buffer, more than 95% of the + inflate execution time is spent in this routine. + + Entry assumptions: + + state->mode == LEN + strm->avail_in >= 6 + strm->avail_out >= 258 + start >= strm->avail_out + state->bits < 8 + + On return, state->mode is one of: + + LEN -- ran out of enough output space or enough available input + TYPE -- reached end of block code, inflate() to interpret next block + BAD -- error in block data + + Notes: + + - The maximum input bits used by a length/distance pair is 15 bits for the + length code, 5 bits for the length extra, 15 bits for the distance code, + and 13 bits for the distance extra. This totals 48 bits, or six bytes. + Therefore if strm->avail_in >= 6, then there is enough input to avoid + checking for available input while decoding. + + - The maximum bytes that a single length/distance pair can output is 258 + bytes, which is the maximum length that can be coded. inflate_fast() + requires strm->avail_out >= 258 for each loop to avoid checking for + output space. + */ + + + + typedef struct inffast_ar { +/* 64 32 x86 x86_64 */ +/* ar offset register */ +/* 0 0 */ void *esp; /* esp save */ +/* 8 4 */ void *ebp; /* ebp save */ +/* 16 8 */ unsigned char FAR *in; /* esi rsi local strm->next_in */ +/* 24 12 */ unsigned char FAR *last; /* r9 while in < last */ +/* 32 16 */ unsigned char FAR *out; /* edi rdi local strm->next_out */ +/* 40 20 */ unsigned char FAR *beg; /* inflate()'s init next_out */ +/* 48 24 */ unsigned char FAR *end; /* r10 while out < end */ +/* 56 28 */ unsigned char FAR *window;/* size of window, wsize!=0 */ +/* 64 32 */ code const FAR *lcode; /* ebp rbp local strm->lencode */ +/* 72 36 */ code const FAR *dcode; /* r11 local strm->distcode */ +/* 80 40 */ size_t /*unsigned long */hold; /* edx rdx local strm->hold */ +/* 88 44 */ unsigned bits; /* ebx rbx local strm->bits */ +/* 92 48 */ unsigned wsize; /* window size */ +/* 96 52 */ unsigned write; /* window write index */ +/*100 56 */ unsigned lmask; /* r12 mask for lcode */ +/*104 60 */ unsigned dmask; /* r13 mask for dcode */ +/*108 64 */ unsigned len; /* r14 match length */ +/*112 68 */ unsigned dist; /* r15 match distance */ +/*116 72 */ unsigned status; /* set when state chng*/ + } type_ar; +#ifdef ASMINF + +void inflate_fast(strm, start) +z_streamp strm; +unsigned start; /* inflate()'s starting value for strm->avail_out */ +{ + struct inflate_state FAR *state; + type_ar ar; + void inffas8664fnc(struct inffast_ar * par); + + + +#if (defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )) || (defined(_MSC_VER) && defined(_M_AMD64)) +#define PAD_AVAIL_IN 6 +#define PAD_AVAIL_OUT 258 +#else +#define PAD_AVAIL_IN 5 +#define PAD_AVAIL_OUT 257 +#endif + + /* copy state to local variables */ + state = (struct inflate_state FAR *)strm->state; + + ar.in = strm->next_in; + ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN); + ar.out = strm->next_out; + ar.beg = ar.out - (start - strm->avail_out); + ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT); + ar.wsize = state->wsize; + ar.write = state->write; + ar.window = state->window; + ar.hold = state->hold; + ar.bits = state->bits; + ar.lcode = state->lencode; + ar.dcode = state->distcode; + ar.lmask = (1U << state->lenbits) - 1; + ar.dmask = (1U << state->distbits) - 1; + + /* decode literals and length/distances until end-of-block or not enough + input data or output space */ + + /* align in on 1/2 hold size boundary */ + while (((size_t)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) { + ar.hold += (unsigned long)*ar.in++ << ar.bits; + ar.bits += 8; + } + + inffas8664fnc(&ar); + + if (ar.status > 1) { + if (ar.status == 2) + strm->msg = "invalid literal/length code"; + else if (ar.status == 3) + strm->msg = "invalid distance code"; + else + strm->msg = "invalid distance too far back"; + state->mode = BAD; + } + else if ( ar.status == 1 ) { + state->mode = TYPE; + } + + /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ + ar.len = ar.bits >> 3; + ar.in -= ar.len; + ar.bits -= ar.len << 3; + ar.hold &= (1U << ar.bits) - 1; + + /* update state and return */ + strm->next_in = ar.in; + strm->next_out = ar.out; + strm->avail_in = (unsigned)(ar.in < ar.last ? + PAD_AVAIL_IN + (ar.last - ar.in) : + PAD_AVAIL_IN - (ar.in - ar.last)); + strm->avail_out = (unsigned)(ar.out < ar.end ? + PAD_AVAIL_OUT + (ar.end - ar.out) : + PAD_AVAIL_OUT - (ar.out - ar.end)); + state->hold = (unsigned long)ar.hold; + state->bits = ar.bits; + return; +} + +#endif Added: external/zlib/contrib/masmx64/inffasx64.asm ============================================================================== --- (empty file) +++ external/zlib/contrib/masmx64/inffasx64.asm Tue Jan 3 07:42:59 2006 @@ -0,0 +1,392 @@ +; inffasx64.asm is a hand tuned assembler version of inffast.c - fast decoding +; version for AMD64 on Windows using Microsoft C compiler +; +; inffasx64.asm is automatically convert from AMD64 portion of inffas86.c +; inffasx64.asm is called by inffas8664.c, which contain more info. + + +; to compile this file, I use option +; ml64.exe /Flinffasx64 /c /Zi inffasx64.asm +; with Microsoft Macro Assembler (x64) for AMD64 +; +; ml64.exe is given with Visual Studio 2005, Windows 2003 server DDK +; +; (you can get Windows 2003 server DDK with ml64 and cl.exe for AMD64 from +; http://www.microsoft.com/whdc/devtools/ddk/default.mspx for low price) +; + +.code +inffas8664fnc PROC + +; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and +; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp +; +; All registers must be preserved across the call, except for +; rax, rcx, rdx, r8, r-9, r10, and r11, which are scratch. + + + mov [rsp-8],rsi + mov [rsp-16],rdi + mov [rsp-24],r12 + mov [rsp-32],r13 + mov [rsp-40],r14 + mov [rsp-48],r15 + mov [rsp-56],rbx + + mov rax,rcx + + mov [rax+8], rbp ; /* save regs rbp and rsp */ + mov [rax], rsp + + mov rsp, rax ; /* make rsp point to &ar */ + + mov rsi, [rsp+16] ; /* rsi = in */ + mov rdi, [rsp+32] ; /* rdi = out */ + mov r9, [rsp+24] ; /* r9 = last */ + mov r10, [rsp+48] ; /* r10 = end */ + mov rbp, [rsp+64] ; /* rbp = lcode */ + mov r11, [rsp+72] ; /* r11 = dcode */ + mov rdx, [rsp+80] ; /* rdx = hold */ + mov ebx, [rsp+88] ; /* ebx = bits */ + mov r12d, [rsp+100] ; /* r12d = lmask */ + mov r13d, [rsp+104] ; /* r13d = dmask */ + ; /* r14d = len */ + ; /* r15d = dist */ + + + cld + cmp r10, rdi + je L_one_time ; /* if only one decode left */ + cmp r9, rsi + + jne L_do_loop + + +L_one_time: + mov r8, r12 ; /* r8 = lmask */ + cmp bl, 32 + ja L_get_length_code_one_time + + lodsd ; /* eax = *(uint *)in++ */ + mov cl, bl ; /* cl = bits, needs it for shifting */ + add bl, 32 ; /* bits += 32 */ + shl rax, cl + or rdx, rax ; /* hold |= *((uint *)in)++ << bits */ + jmp L_get_length_code_one_time + +ALIGN 4 +L_while_test: + cmp r10, rdi + jbe L_break_loop + cmp r9, rsi + jbe L_break_loop + +L_do_loop: + mov r8, r12 ; /* r8 = lmask */ + cmp bl, 32 + ja L_get_length_code ; /* if (32 < bits) */ + + lodsd ; /* eax = *(uint *)in++ */ + mov cl, bl ; /* cl = bits, needs it for shifting */ + add bl, 32 ; /* bits += 32 */ + shl rax, cl + or rdx, rax ; /* hold |= *((uint *)in)++ << bits */ + +L_get_length_code: + and r8, rdx ; /* r8 &= hold */ + mov eax, [rbp+r8*4] ; /* eax = lcode[hold & lmask] */ + + mov cl, ah ; /* cl = this.bits */ + sub bl, ah ; /* bits -= this.bits */ + shr rdx, cl ; /* hold >>= this.bits */ + + test al, al + jnz L_test_for_length_base ; /* if (op != 0) 45.7% */ + + mov r8, r12 ; /* r8 = lmask */ + shr eax, 16 ; /* output this.val char */ + stosb + +L_get_length_code_one_time: + and r8, rdx ; /* r8 &= hold */ + mov eax, [rbp+r8*4] ; /* eax = lcode[hold & lmask] */ + +L_dolen: + mov cl, ah ; /* cl = this.bits */ + sub bl, ah ; /* bits -= this.bits */ + shr rdx, cl ; /* hold >>= this.bits */ + + test al, al + jnz L_test_for_length_base ; /* if (op != 0) 45.7% */ + + shr eax, 16 ; /* output this.val char */ + stosb + jmp L_while_test + +ALIGN 4 +L_test_for_length_base: + mov r14d, eax ; /* len = this */ + shr r14d, 16 ; /* len = this.val */ + mov cl, al + + test al, 16 + jz L_test_for_second_level_length ; /* if ((op & 16) == 0) 8% */ + and cl, 15 ; /* op &= 15 */ + jz L_decode_distance ; /* if (!op) */ + +L_add_bits_to_len: + sub bl, cl + xor eax, eax + inc eax + shl eax, cl + dec eax + and eax, edx ; /* eax &= hold */ + shr rdx, cl + add r14d, eax ; /* len += hold & mask[op] */ + +L_decode_distance: + mov r8, r13 ; /* r8 = dmask */ + cmp bl, 32 + ja L_get_distance_code ; /* if (32 < bits) */ + + lodsd ; /* eax = *(uint *)in++ */ + mov cl, bl ; /* cl = bits, needs it for shifting */ + add bl, 32 ; /* bits += 32 */ + shl rax, cl + or rdx, rax ; /* hold |= *((uint *)in)++ << bits */ + +L_get_distance_code: + and r8, rdx ; /* r8 &= hold */ + mov eax, [r11+r8*4] ; /* eax = dcode[hold & dmask] */ + +L_dodist: + mov r15d, eax ; /* dist = this */ + shr r15d, 16 ; /* dist = this.val */ + mov cl, ah + sub bl, ah ; /* bits -= this.bits */ + shr rdx, cl ; /* hold >>= this.bits */ + mov cl, al ; /* cl = this.op */ + + test al, 16 ; /* if ((op & 16) == 0) */ + jz L_test_for_second_level_dist + and cl, 15 ; /* op &= 15 */ + jz L_check_dist_one + +L_add_bits_to_dist: + sub bl, cl + xor eax, eax + inc eax + shl eax, cl + dec eax ; /* (1 << op) - 1 */ + and eax, edx ; /* eax &= hold */ + shr rdx, cl + add r15d, eax ; /* dist += hold & ((1 << op) - 1) */ + +L_check_window: + mov r8, rsi ; /* save in so from can use it's reg */ + mov rax, rdi + sub rax, [rsp+40] ; /* nbytes = out - beg */ + + cmp eax, r15d + jb L_clip_window ; /* if (dist > nbytes) 4.2% */ + + mov ecx, r14d ; /* ecx = len */ + mov rsi, rdi + sub rsi, r15 ; /* from = out - dist */ + + sar ecx, 1 + jnc L_copy_two ; /* if len % 2 == 0 */ + + rep movsw + mov al, [rsi] + mov [rdi], al + inc rdi + + mov rsi, r8 ; /* move in back to %rsi, toss from */ + jmp L_while_test + +L_copy_two: + rep movsw + mov rsi, r8 ; /* move in back to %rsi, toss from */ + jmp L_while_test + +ALIGN 4 +L_check_dist_one: + cmp r15d, 1 ; /* if dist 1, is a memset */ + jne L_check_window + cmp [rsp+40], rdi ; /* if out == beg, outside window */ + je L_check_window + + mov ecx, r14d ; /* ecx = len */ + mov al, [rdi-1] + mov ah, al + + sar ecx, 1 + jnc L_set_two + mov [rdi], al + inc rdi + +L_set_two: + rep stosw + jmp L_while_test + +ALIGN 4 +L_test_for_second_level_length: + test al, 64 + jnz L_test_for_end_of_block ; /* if ((op & 64) != 0) */ + + xor eax, eax + inc eax + shl eax, cl + dec eax + and eax, edx ; /* eax &= hold */ + add eax, r14d ; /* eax += len */ + mov eax, [rbp+rax*4] ; /* eax = lcode[val+(hold&mask[op])]*/ + jmp L_dolen + +ALIGN 4 +L_test_for_second_level_dist: + test al, 64 + jnz L_invalid_distance_code ; /* if ((op & 64) != 0) */ + + xor eax, eax + inc eax + shl eax, cl + dec eax + and eax, edx ; /* eax &= hold */ + add eax, r15d ; /* eax += dist */ + mov eax, [r11+rax*4] ; /* eax = dcode[val+(hold&mask[op])]*/ + jmp L_dodist + +ALIGN 4 +L_clip_window: + mov ecx, eax ; /* ecx = nbytes */ + mov eax, [rsp+92] ; /* eax = wsize, prepare for dist cmp */ + neg ecx ; /* nbytes = -nbytes */ + + cmp eax, r15d + jb L_invalid_distance_too_far ; /* if (dist > wsize) */ + + add ecx, r15d ; /* nbytes = dist - nbytes */ + cmp dword ptr [rsp+96], 0 + jne L_wrap_around_window ; /* if (write != 0) */ + + mov rsi, [rsp+56] ; /* from = window */ + sub eax, ecx ; /* eax -= nbytes */ + add rsi, rax ; /* from += wsize - nbytes */ + + mov eax, r14d ; /* eax = len */ + cmp r14d, ecx + jbe L_do_copy ; /* if (nbytes >= len) */ + + sub eax, ecx ; /* eax -= nbytes */ + rep movsb + mov rsi, rdi + sub rsi, r15 ; /* from = &out[ -dist ] */ + jmp L_do_copy + +ALIGN 4 +L_wrap_around_window: + mov eax, [rsp+96] ; /* eax = write */ + cmp ecx, eax + jbe L_contiguous_in_window ; /* if (write >= nbytes) */ + + mov esi, [rsp+92] ; /* from = wsize */ + add rsi, [rsp+56] ; /* from += window */ + add rsi, rax ; /* from += write */ + sub rsi, rcx ; /* from -= nbytes */ + sub ecx, eax ; /* nbytes -= write */ + + mov eax, r14d ; /* eax = len */ + cmp eax, ecx + jbe L_do_copy ; /* if (nbytes >= len) */ + + sub eax, ecx ; /* len -= nbytes */ + rep movsb + mov rsi, [rsp+56] ; /* from = window */ + mov ecx, [rsp+96] ; /* nbytes = write */ + cmp eax, ecx + jbe L_do_copy ; /* if (nbytes >= len) */ + + sub eax, ecx ; /* len -= nbytes */ + rep movsb + mov rsi, rdi + sub rsi, r15 ; /* from = out - dist */ + jmp L_do_copy + +ALIGN 4 +L_contiguous_in_window: + mov rsi, [rsp+56] ; /* rsi = window */ + add rsi, rax + sub rsi, rcx ; /* from += write - nbytes */ + + mov eax, r14d ; /* eax = len */ + cmp eax, ecx + jbe L_do_copy ; /* if (nbytes >= len) */ + + sub eax, ecx ; /* len -= nbytes */ + rep movsb + mov rsi, rdi + sub rsi, r15 ; /* from = out - dist */ + jmp L_do_copy ; /* if (nbytes >= len) */ + +ALIGN 4 +L_do_copy: + mov ecx, eax ; /* ecx = len */ + rep movsb + + mov rsi, r8 ; /* move in back to %esi, toss from */ + jmp L_while_test + +L_test_for_end_of_block: + test al, 32 + jz L_invalid_literal_length_code + mov dword ptr [rsp+116], 1 + jmp L_break_loop_with_status + +L_invalid_literal_length_code: + mov dword ptr [rsp+116], 2 + jmp L_break_loop_with_status + +L_invalid_distance_code: + mov dword ptr [rsp+116], 3 + jmp L_break_loop_with_status + +L_invalid_distance_too_far: + mov dword ptr [rsp+116], 4 + jmp L_break_loop_with_status + +L_break_loop: + mov dword ptr [rsp+116], 0 + +L_break_loop_with_status: +; /* put in, out, bits, and hold back into ar and pop esp */ + mov [rsp+16], rsi ; /* in */ + mov [rsp+32], rdi ; /* out */ + mov [rsp+88], ebx ; /* bits */ + mov [rsp+80], rdx ; /* hold */ + + mov rax, [rsp] ; /* restore rbp and rsp */ + mov rbp, [rsp+8] + mov rsp, rax + + + + mov rsi,[rsp-8] + mov rdi,[rsp-16] + mov r12,[rsp-24] + mov r13,[rsp-32] + mov r14,[rsp-40] + mov r15,[rsp-48] + mov rbx,[rsp-56] + + ret 0 +; : +; : "m" (ar) +; : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", +; "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" +; ); + +inffas8664fnc ENDP +;_TEXT ENDS +END Added: external/zlib/contrib/masmx64/inffasx64.obj ============================================================================== Binary file. No diff available. Added: external/zlib/contrib/masmx64/readme.txt ============================================================================== --- (empty file) +++ external/zlib/contrib/masmx64/readme.txt Tue Jan 3 07:42:59 2006 @@ -0,0 +1,28 @@ +Summary +------- +This directory contains ASM implementations of the functions +longest_match() and inflate_fast(), for 64 bits x86 (both AMD64 and Intel EM64t), +for use with Microsoft Macro Assembler (x64) for AMD64 and Microsoft C++ 64 bits. + +gvmat64.asm is written by Gilles Vollant (2005), by using Brian Raiter 686/32 bits + assembly optimized version from Jean-loup Gailly original longest_match function + +inffasx64.asm and inffas8664.c were written by Chris Anderson, by optimizing + original function from Mark Adler + +Use instructions +---------------- +Copy these files into the zlib source directory. + +define ASMV and ASMINF in your project. Include inffas8664.c in your source tree, +and inffasx64.obj and gvmat64.obj as object to link. + + +Build instructions +------------------ +run bld_64.bat with Microsoft Macro Assembler (x64) for AMD64 (ml64.exe) + +ml64.exe is given with Visual Studio 2005, Windows 2003 server DDK + +You can get Windows 2003 server DDK with ml64 and cl for AMD64 from + http://www.microsoft.com/whdc/devtools/ddk/default.mspx for low price) Added: external/zlib/contrib/masmx86/bld_ml32.bat ============================================================================== --- (empty file) +++ external/zlib/contrib/masmx86/bld_ml32.bat Tue Jan 3 07:42:59 2006 @@ -0,0 +1,2 @@ +ml /coff /Zi /c /Flgvmat32.lst gvmat32.asm +ml /coff /Zi /c /Flinffas32.lst inffas32.asm Added: external/zlib/contrib/masmx86/gvmat32.asm ============================================================================== --- (empty file) +++ external/zlib/contrib/masmx86/gvmat32.asm Tue Jan 3 07:42:59 2006 @@ -0,0 +1,972 @@ +; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86 +; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant. +; File written by Gilles Vollant, by modifiying the longest_match +; from Jean-loup Gailly in deflate.c +; +; http://www.zlib.net +; http://www.winimage.com/zLibDll +; http://www.muppetlabs.com/~breadbox/software/assembly.html +; +; For Visual C++ 4.x and higher and ML 6.x and higher +; ml.exe is in directory \MASM611C of Win95 DDK +; ml.exe is also distributed in http://www.masm32.com/masmdl.htm +; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/ +; +; this file contain two implementation of longest_match +; +; longest_match_7fff : written 1996 by Gilles Vollant optimized for +; first Pentium. Assume s->w_mask == 0x7fff +; longest_match_686 : written by Brian raiter (1998), optimized for Pentium Pro +; +; for using an seembly version of longest_match, you need define ASMV in project +; There is two way in using gvmat32.asm +; +; A) Suggested method +; if you want include both longest_match_7fff and longest_match_686 +; compile the asm file running +; ml /coff /Zi /Flgvmat32.lst /c gvmat32.asm +; and include gvmat32c.c in your project +; if you have an old cpu (386,486 or first Pentium) and s->w_mask==0x7fff, +; longest_match_7fff will be used +; if you have a more modern CPU (Pentium Pro, II and higher) +; longest_match_686 will be used +; on old cpu with s->w_mask!=0x7fff, longest_match_686 will be used, +; but this is not a sitation you'll find often +; +; B) Alternative +; if you are not interresed in old cpu performance and want the smaller +; binaries possible +; +; compile the asm file running +; ml /coff /Zi /c /Flgvmat32.lst /DNOOLDPENTIUMCODE gvmat32.asm +; and do not include gvmat32c.c in your project (ou define also +; NOOLDPENTIUMCODE) +; +; note : as I known, longest_match_686 is very faster than longest_match_7fff +; on pentium Pro/II/III, faster (but less) in P4, but it seem +; longest_match_7fff can be faster (very very litte) on AMD Athlon64/K8 +; +; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2 + +;uInt longest_match_7fff(s, cur_match) +; deflate_state *s; +; IPos cur_match; /* current match */ + + NbStack equ 76 + cur_match equ dword ptr[esp+NbStack-0] + str_s equ dword ptr[esp+NbStack-4] +; 5 dword on top (ret,ebp,esi,edi,ebx) + adrret equ dword ptr[esp+NbStack-8] + pushebp equ dword ptr[esp+NbStack-12] + pushedi equ dword ptr[esp+NbStack-16] + pushesi equ dword ptr[esp+NbStack-20] + pushebx equ dword ptr[esp+NbStack-24] + + chain_length equ dword ptr [esp+NbStack-28] + limit equ dword ptr [esp+NbStack-32] + best_len equ dword ptr [esp+NbStack-36] + window equ dword ptr [esp+NbStack-40] + prev equ dword ptr [esp+NbStack-44] + scan_start equ word ptr [esp+NbStack-48] + wmask equ dword ptr [esp+NbStack-52] + match_start_ptr equ dword ptr [esp+NbStack-56] + nice_match equ dword ptr [esp+NbStack-60] + scan equ dword ptr [esp+NbStack-64] + + windowlen equ dword ptr [esp+NbStack-68] + match_start equ dword ptr [esp+NbStack-72] + strend equ dword ptr [esp+NbStack-76] + NbStackAdd equ (NbStack-24) + + .386p + + name gvmatch + .MODEL FLAT + + + +; all the +zlib1222add offsets are due to the addition of fields +; in zlib in the deflate_state structure since the asm code was first written +; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). +; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). +; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). + + zlib1222add equ 8 + +; Note : these value are good with a 8 bytes boundary pack structure + dep_chain_length equ 74h+zlib1222add + dep_window equ 30h+zlib1222add + dep_strstart equ 64h+zlib1222add + dep_prev_length equ 70h+zlib1222add + dep_nice_match equ 88h+zlib1222add + dep_w_size equ 24h+zlib1222add + dep_prev equ 38h+zlib1222add + dep_w_mask equ 2ch+zlib1222add + dep_good_match equ 84h+zlib1222add + dep_match_start equ 68h+zlib1222add + dep_lookahead equ 6ch+zlib1222add + + +_TEXT segment + +IFDEF NOUNDERLINE + IFDEF NOOLDPENTIUMCODE + public longest_match + public match_init + ELSE + public longest_match_7fff + public cpudetect32 + public longest_match_686 + ENDIF +ELSE + IFDEF NOOLDPENTIUMCODE + public _longest_match + public _match_init + ELSE + public _longest_match_7fff + public _cpudetect32 + public _longest_match_686 + ENDIF +ENDIF + + MAX_MATCH equ 258 + MIN_MATCH equ 3 + MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) + + + +IFNDEF NOOLDPENTIUMCODE +IFDEF NOUNDERLINE +longest_match_7fff proc near +ELSE +_longest_match_7fff proc near +ENDIF + + mov edx,[esp+4] + + + + push ebp + push edi + push esi + push ebx + + sub esp,NbStackAdd + +; initialize or check the variables used in match.asm. + mov ebp,edx + +; chain_length = s->max_chain_length +; if (prev_length>=good_match) chain_length >>= 2 + mov edx,[ebp+dep_chain_length] + mov ebx,[ebp+dep_prev_length] + cmp [ebp+dep_good_match],ebx + ja noshr + shr edx,2 +noshr: +; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop + inc edx + mov edi,[ebp+dep_nice_match] + mov chain_length,edx + mov eax,[ebp+dep_lookahead] + cmp eax,edi +; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + jae nolookaheadnicematch + mov edi,eax +nolookaheadnicematch: +; best_len = s->prev_length + mov best_len,ebx + +; window = s->window + mov esi,[ebp+dep_window] + mov ecx,[ebp+dep_strstart] + mov window,esi + + mov nice_match,edi +; scan = window + strstart + add esi,ecx + mov scan,esi +; dx = *window + mov dx,word ptr [esi] +; bx = *(window+best_len-1) + mov bx,word ptr [esi+ebx-1] + add esi,MAX_MATCH-1 +; scan_start = *scan + mov scan_start,dx +; strend = scan + MAX_MATCH-1 + mov strend,esi +; bx = scan_end = *(window+best_len-1) + +; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? +; s->strstart - (IPos)MAX_DIST(s) : NIL; + + mov esi,[ebp+dep_w_size] + sub esi,MIN_LOOKAHEAD +; here esi = MAX_DIST(s) + sub ecx,esi + ja nodist + xor ecx,ecx +nodist: + mov limit,ecx + +; prev = s->prev + mov edx,[ebp+dep_prev] + mov prev,edx + +; + mov edx,dword ptr [ebp+dep_match_start] + mov bp,scan_start + mov eax,cur_match + mov match_start,edx + + mov edx,window + mov edi,edx + add edi,best_len + mov esi,prev + dec edi +; windowlen = window + best_len -1 + mov windowlen,edi + + jmp beginloop2 + align 4 + +; here, in the loop +; eax = ax = cur_match +; ecx = limit +; bx = scan_end +; bp = scan_start +; edi = windowlen (window + best_len -1) +; esi = prev + + +;// here; chain_length <=16 +normalbeg0add16: + add chain_length,16 + jz exitloop +normalbeg0: + cmp word ptr[edi+eax],bx + je normalbeg2noroll +rcontlabnoroll: +; cur_match = prev[cur_match & wmask] + and eax,7fffh + mov ax,word ptr[esi+eax*2] +; if cur_match > limit, go to exitloop + cmp ecx,eax + jnb exitloop +; if --chain_length != 0, go to exitloop + dec chain_length + jnz normalbeg0 + jmp exitloop + +normalbeg2noroll: +; if (scan_start==*(cur_match+window)) goto normalbeg2 + cmp bp,word ptr[edx+eax] + jne rcontlabnoroll + jmp normalbeg2 + +contloop3: + mov edi,windowlen + +; cur_match = prev[cur_match & wmask] + and eax,7fffh + mov ax,word ptr[esi+eax*2] +; if cur_match > limit, go to exitloop + cmp ecx,eax +jnbexitloopshort1: + jnb exitloop +; if --chain_length != 0, go to exitloop + + +; begin the main loop +beginloop2: + sub chain_length,16+1 +; if chain_length <=16, don't use the unrolled loop + jna normalbeg0add16 + +do16: + cmp word ptr[edi+eax],bx + je normalbeg2dc0 + +maccn MACRO lab + and eax,7fffh + mov ax,word ptr[esi+eax*2] + cmp ecx,eax + jnb exitloop + cmp word ptr[edi+eax],bx + je lab + ENDM + +rcontloop0: + maccn normalbeg2dc1 + +rcontloop1: + maccn normalbeg2dc2 + +rcontloop2: + maccn normalbeg2dc3 + +rcontloop3: + maccn normalbeg2dc4 + +rcontloop4: + maccn normalbeg2dc5 + +rcontloop5: + maccn normalbeg2dc6 + +rcontloop6: + maccn normalbeg2dc7 + +rcontloop7: + maccn normalbeg2dc8 + +rcontloop8: + maccn normalbeg2dc9 + +rcontloop9: + maccn normalbeg2dc10 + +rcontloop10: + maccn short normalbeg2dc11 + +rcontloop11: + maccn short normalbeg2dc12 + +rcontloop12: + maccn short normalbeg2dc13 + +rcontloop13: + maccn short normalbeg2dc14 + +rcontloop14: + maccn short normalbeg2dc15 + +rcontloop15: + and eax,7fffh + mov ax,word ptr[esi+eax*2] + cmp ecx,eax + jnb exitloop + + sub chain_length,16 + ja do16 + jmp normalbeg0add16 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +normbeg MACRO rcontlab,valsub +; if we are here, we know that *(match+best_len-1) == scan_end + cmp bp,word ptr[edx+eax] +; if (match != scan_start) goto rcontlab + jne rcontlab +; calculate the good chain_length, and we'll compare scan and match string + add chain_length,16-valsub + jmp iseq + ENDM + + +normalbeg2dc11: + normbeg rcontloop11,11 + +normalbeg2dc12: + normbeg short rcontloop12,12 + +normalbeg2dc13: + normbeg short rcontloop13,13 + +normalbeg2dc14: + normbeg short rcontloop14,14 + +normalbeg2dc15: + normbeg short rcontloop15,15 + +normalbeg2dc10: + normbeg rcontloop10,10 + +normalbeg2dc9: + normbeg rcontloop9,9 + +normalbeg2dc8: + normbeg rcontloop8,8 + +normalbeg2dc7: + normbeg rcontloop7,7 + +normalbeg2dc6: + normbeg rcontloop6,6 + +normalbeg2dc5: + normbeg rcontloop5,5 + +normalbeg2dc4: + normbeg rcontloop4,4 + +normalbeg2dc3: + normbeg rcontloop3,3 + +normalbeg2dc2: + normbeg rcontloop2,2 + +normalbeg2dc1: + normbeg rcontloop1,1 + +normalbeg2dc0: + normbeg rcontloop0,0 + + +; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end + +normalbeg2: + mov edi,window + + cmp bp,word ptr[edi+eax] + jne contloop3 ; if *(ushf*)match != scan_start, continue + +iseq: +; if we are here, we know that *(match+best_len-1) == scan_end +; and (match == scan_start) + + mov edi,edx + mov esi,scan ; esi = scan + add edi,eax ; edi = window + cur_match = match + + mov edx,[esi+3] ; compare manually dword at match+3 + xor edx,[edi+3] ; and scan +3 + + jz begincompare ; if equal, go to long compare + +; we will determine the unmatch byte and calculate len (in esi) + or dl,dl + je eq1rr + mov esi,3 + jmp trfinval +eq1rr: + or dx,dx + je eq1 + + mov esi,4 + jmp trfinval +eq1: + and edx,0ffffffh + jz eq11 + mov esi,5 + jmp trfinval +eq11: + mov esi,6 + jmp trfinval + +begincompare: + ; here we now scan and match begin same + add edi,6 + add esi,6 + mov ecx,(MAX_MATCH-(2+4))/4 ; scan for at most MAX_MATCH bytes + repe cmpsd ; loop until mismatch + + je trfin ; go to trfin if not unmatch +; we determine the unmatch byte + sub esi,4 + mov edx,[edi-4] + xor edx,[esi] + + or dl,dl + jnz trfin + inc esi + + or dx,dx + jnz trfin + inc esi + + and edx,0ffffffh + jnz trfin + inc esi + +trfin: + sub esi,scan ; esi = len +trfinval: +; here we have finised compare, and esi contain len of equal string + cmp esi,best_len ; if len > best_len, go newbestlen + ja short newbestlen +; now we restore edx, ecx and esi, for the big loop + mov esi,prev + mov ecx,limit + mov edx,window + jmp contloop3 + +newbestlen: + mov best_len,esi ; len become best_len + + mov match_start,eax ; save new position as match_start + cmp esi,nice_match ; if best_len >= nice_match, exit + jae exitloop + mov ecx,scan + mov edx,window ; restore edx=window + add ecx,esi + add esi,edx + + dec esi + mov windowlen,esi ; windowlen = window + best_len-1 + mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end + +; now we restore ecx and esi, for the big loop : + mov esi,prev + mov ecx,limit + jmp contloop3 + +exitloop: +; exit : s->match_start=match_start + mov ebx,match_start + mov ebp,str_s + mov ecx,best_len + mov dword ptr [ebp+dep_match_start],ebx + mov eax,dword ptr [ebp+dep_lookahead] + cmp ecx,eax + ja minexlo + mov eax,ecx +minexlo: +; return min(best_len,s->lookahead) + +; restore stack and register ebx,esi,edi,ebp + add esp,NbStackAdd + + pop ebx + pop esi + pop edi + pop ebp + ret +InfoAuthor: +; please don't remove this string ! +; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary! + db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah + + + +IFDEF NOUNDERLINE +longest_match_7fff endp +ELSE +_longest_match_7fff endp +ENDIF + + +IFDEF NOUNDERLINE +cpudetect32 proc near +ELSE +_cpudetect32 proc near +ENDIF + + push ebx + + pushfd ; push original EFLAGS + pop eax ; get original EFLAGS + mov ecx, eax ; save original EFLAGS + xor eax, 40000h ; flip AC bit in EFLAGS + push eax ; save new EFLAGS value on stack + popfd ; replace current EFLAGS value + pushfd ; get new EFLAGS + pop eax ; store new EFLAGS in EAX + xor eax, ecx ; can’t toggle AC bit, processor=80386 + jz end_cpu_is_386 ; jump if 80386 processor + push ecx + popfd ; restore AC bit in EFLAGS first + + pushfd + pushfd + pop ecx + + mov eax, ecx ; get original EFLAGS + xor eax, 200000h ; flip ID bit in EFLAGS + push eax ; save new EFLAGS value on stack + popfd ; replace current EFLAGS value + pushfd ; get new EFLAGS + pop eax ; store new EFLAGS in EAX + popfd ; restore original EFLAGS + xor eax, ecx ; can’t toggle ID bit, + je is_old_486 ; processor=old + + mov eax,1 + db 0fh,0a2h ;CPUID + +exitcpudetect: + pop ebx + ret + +end_cpu_is_386: + mov eax,0300h + jmp exitcpudetect + +is_old_486: + mov eax,0400h + jmp exitcpudetect + +IFDEF NOUNDERLINE +cpudetect32 endp +ELSE +_cpudetect32 endp +ENDIF +ENDIF + +MAX_MATCH equ 258 +MIN_MATCH equ 3 +MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1) +MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h) + + +;;; stack frame offsets + +chainlenwmask equ esp + 0 ; high word: current chain len + ; low word: s->wmask +window equ esp + 4 ; local copy of s->window +windowbestlen equ esp + 8 ; s->window + bestlen +scanstart equ esp + 16 ; first two bytes of string +scanend equ esp + 12 ; last two bytes of string +scanalign equ esp + 20 ; dword-misalignment of string +nicematch equ esp + 24 ; a good enough match size +bestlen equ esp + 28 ; size of best match so far +scan equ esp + 32 ; ptr to string wanting match + +LocalVarsSize equ 36 +; saved ebx byte esp + 36 +; saved edi byte esp + 40 +; saved esi byte esp + 44 +; saved ebp byte esp + 48 +; return address byte esp + 52 +deflatestate equ esp + 56 ; the function arguments +curmatch equ esp + 60 + +;;; Offsets for fields in the deflate_state structure. These numbers +;;; are calculated from the definition of deflate_state, with the +;;; assumption that the compiler will dword-align the fields. (Thus, +;;; changing the definition of deflate_state could easily cause this +;;; program to crash horribly, without so much as a warning at +;;; compile time. Sigh.) + +dsWSize equ 36+zlib1222add +dsWMask equ 44+zlib1222add +dsWindow equ 48+zlib1222add +dsPrev equ 56+zlib1222add +dsMatchLen equ 88+zlib1222add +dsPrevMatch equ 92+zlib1222add +dsStrStart equ 100+zlib1222add +dsMatchStart equ 104+zlib1222add +dsLookahead equ 108+zlib1222add +dsPrevLen equ 112+zlib1222add +dsMaxChainLen equ 116+zlib1222add +dsGoodMatch equ 132+zlib1222add +dsNiceMatch equ 136+zlib1222add + + +;;; match.asm -- Pentium-Pro-optimized version of longest_match() +;;; Written for zlib 1.1.2 +;;; Copyright (C) 1998 Brian Raiter +;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html +;;; +;;; This is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License. + +;GLOBAL _longest_match, _match_init + + +;SECTION .text + +;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch) + +;_longest_match: +IFDEF NOOLDPENTIUMCODE + IFDEF NOUNDERLINE + longest_match proc near + ELSE + _longest_match proc near + ENDIF +ELSE + IFDEF NOUNDERLINE + longest_match_686 proc near + ELSE + _longest_match_686 proc near + ENDIF +ENDIF + +;;; Save registers that the compiler may be using, and adjust esp to +;;; make room for our stack frame. + + push ebp + push edi + push esi + push ebx + sub esp, LocalVarsSize + +;;; Retrieve the function arguments. ecx will hold cur_match +;;; throughout the entire function. edx will hold the pointer to the +;;; deflate_state structure during the function's setup (before +;;; entering the main loop. + + mov edx, [deflatestate] + mov ecx, [curmatch] + +;;; uInt wmask = s->w_mask; +;;; unsigned chain_length = s->max_chain_length; +;;; if (s->prev_length >= s->good_match) { +;;; chain_length >>= 2; +;;; } + + mov eax, [edx + dsPrevLen] + mov ebx, [edx + dsGoodMatch] + cmp eax, ebx + mov eax, [edx + dsWMask] + mov ebx, [edx + dsMaxChainLen] + jl LastMatchGood + shr ebx, 2 +LastMatchGood: + +;;; chainlen is decremented once beforehand so that the function can +;;; use the sign flag instead of the zero flag for the exit test. +;;; It is then shifted into the high word, to make room for the wmask +;;; value, which it will always accompany. + + dec ebx + shl ebx, 16 + or ebx, eax + mov [chainlenwmask], ebx + +;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + + mov eax, [edx + dsNiceMatch] + mov ebx, [edx + dsLookahead] + cmp ebx, eax + jl LookaheadLess + mov ebx, eax +LookaheadLess: mov [nicematch], ebx + +;;; register Bytef *scan = s->window + s->strstart; + + mov esi, [edx + dsWindow] + mov [window], esi + mov ebp, [edx + dsStrStart] + lea edi, [esi + ebp] + mov [scan], edi + +;;; Determine how many bytes the scan ptr is off from being +;;; dword-aligned. + + mov eax, edi + neg eax + and eax, 3 + mov [scanalign], eax + +;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? +;;; s->strstart - (IPos)MAX_DIST(s) : NIL; + + mov eax, [edx + dsWSize] + sub eax, MIN_LOOKAHEAD + sub ebp, eax + jg LimitPositive + xor ebp, ebp +LimitPositive: + +;;; int best_len = s->prev_length; + + mov eax, [edx + dsPrevLen] + mov [bestlen], eax + +;;; Store the sum of s->window + best_len in esi locally, and in esi. + + add esi, eax + mov [windowbestlen], esi + +;;; register ush scan_start = *(ushf*)scan; +;;; register ush scan_end = *(ushf*)(scan+best_len-1); +;;; Posf *prev = s->prev; + + movzx ebx, word ptr [edi] + mov [scanstart], ebx + movzx ebx, word ptr [edi + eax - 1] + mov [scanend], ebx + mov edi, [edx + dsPrev] + +;;; Jump into the main loop. + + mov edx, [chainlenwmask] + jmp short LoopEntry + +align 4 + +;;; do { +;;; match = s->window + cur_match; +;;; if (*(ushf*)(match+best_len-1) != scan_end || +;;; *(ushf*)match != scan_start) continue; +;;; [...] +;;; } while ((cur_match = prev[cur_match & wmask]) > limit +;;; && --chain_length != 0); +;;; +;;; Here is the inner loop of the function. The function will spend the +;;; majority of its time in this loop, and majority of that time will +;;; be spent in the first ten instructions. +;;; +;;; Within this loop: +;;; ebx = scanend +;;; ecx = curmatch +;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) +;;; esi = windowbestlen - i.e., (window + bestlen) +;;; edi = prev +;;; ebp = limit + +LookupLoop: + and ecx, edx + movzx ecx, word ptr [edi + ecx*2] + cmp ecx, ebp + jbe LeaveNow + sub edx, 00010000h + js LeaveNow +LoopEntry: movzx eax, word ptr [esi + ecx - 1] + cmp eax, ebx + jnz LookupLoop + mov eax, [window] + movzx eax, word ptr [eax + ecx] + cmp eax, [scanstart] + jnz LookupLoop + +;;; Store the current value of chainlen. + + mov [chainlenwmask], edx + +;;; Point edi to the string under scrutiny, and esi to the string we +;;; are hoping to match it up with. In actuality, esi and edi are +;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is +;;; initialized to -(MAX_MATCH_8 - scanalign). + + mov esi, [window] + mov edi, [scan] + add esi, ecx + mov eax, [scanalign] + mov edx, 0fffffef8h; -(MAX_MATCH_8) + lea edi, [edi + eax + 0108h] ;MAX_MATCH_8] + lea esi, [esi + eax + 0108h] ;MAX_MATCH_8] + +;;; Test the strings for equality, 8 bytes at a time. At the end, +;;; adjust edx so that it is offset to the exact byte that mismatched. +;;; +;;; We already know at this point that the first three bytes of the +;;; strings match each other, and they can be safely passed over before +;;; starting the compare loop. So what this code does is skip over 0-3 +;;; bytes, as much as necessary in order to dword-align the edi +;;; pointer. (esi will still be misaligned three times out of four.) +;;; +;;; It should be confessed that this loop usually does not represent +;;; much of the total running time. Replacing it with a more +;;; straightforward "rep cmpsb" would not drastically degrade +;;; performance. + +LoopCmps: + mov eax, [esi + edx] + xor eax, [edi + edx] + jnz LeaveLoopCmps + mov eax, [esi + edx + 4] + xor eax, [edi + edx + 4] + jnz LeaveLoopCmps4 + add edx, 8 + jnz LoopCmps + jmp short LenMaximum +LeaveLoopCmps4: add edx, 4 +LeaveLoopCmps: test eax, 0000FFFFh + jnz LenLower + add edx, 2 + shr eax, 16 +LenLower: sub al, 1 + adc edx, 0 + +;;; Calculate the length of the match. If it is longer than MAX_MATCH, +;;; then automatically accept it as the best possible match and leave. + + lea eax, [edi + edx] + mov edi, [scan] + sub eax, edi + cmp eax, MAX_MATCH + jge LenMaximum + +;;; If the length of the match is not longer than the best match we +;;; have so far, then forget it and return to the lookup loop. + + mov edx, [deflatestate] + mov ebx, [bestlen] + cmp eax, ebx + jg LongerMatch + mov esi, [windowbestlen] + mov edi, [edx + dsPrev] + mov ebx, [scanend] + mov edx, [chainlenwmask] + jmp LookupLoop + +;;; s->match_start = cur_match; +;;; best_len = len; +;;; if (len >= nice_match) break; +;;; scan_end = *(ushf*)(scan+best_len-1); + +LongerMatch: mov ebx, [nicematch] + mov [bestlen], eax + mov [edx + dsMatchStart], ecx + cmp eax, ebx + jge LeaveNow + mov esi, [window] + add esi, eax + mov [windowbestlen], esi + movzx ebx, word ptr [edi + eax - 1] + mov edi, [edx + dsPrev] + mov [scanend], ebx + mov edx, [chainlenwmask] + jmp LookupLoop + +;;; Accept the current string, with the maximum possible length. + +LenMaximum: mov edx, [deflatestate] + mov dword ptr [bestlen], MAX_MATCH + mov [edx + dsMatchStart], ecx + +;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; +;;; return s->lookahead; + +LeaveNow: + mov edx, [deflatestate] + mov ebx, [bestlen] + mov eax, [edx + dsLookahead] + cmp ebx, eax + jg LookaheadRet + mov eax, ebx +LookaheadRet: + +;;; Restore the stack and return from whence we came. + + add esp, LocalVarsSize + pop ebx + pop esi + pop edi + pop ebp + + ret +; please don't remove this string ! +; Your can freely use gvmat32 in any free or commercial app if you don't remove the string in the binary! + db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah + + +IFDEF NOOLDPENTIUMCODE + IFDEF NOUNDERLINE + longest_match endp + ELSE + _longest_match endp + ENDIF + + IFDEF NOUNDERLINE + match_init proc near + ret + match_init endp + ELSE + _match_init proc near + ret + _match_init endp + ENDIF +ELSE + IFDEF NOUNDERLINE + longest_match_686 endp + ELSE + _longest_match_686 endp + ENDIF +ENDIF + +_TEXT ends +end Added: external/zlib/contrib/masmx86/gvmat32.obj ============================================================================== Binary file. No diff available. Added: external/zlib/contrib/masmx86/gvmat32c.c ============================================================================== --- (empty file) +++ external/zlib/contrib/masmx86/gvmat32c.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,62 @@ +/* gvmat32.c -- C portion of the optimized longest_match for 32 bits x86 + * Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant. + * File written by Gilles Vollant, by modifiying the longest_match + * from Jean-loup Gailly in deflate.c + * it prepare all parameters and call the assembly longest_match_gvasm + * longest_match execute standard C code is wmask != 0x7fff + * (assembly code is faster with a fixed wmask) + * + * Read comment at beginning of gvmat32.asm for more information + */ + +#if defined(ASMV) && (!defined(NOOLDPENTIUMCODE)) +#include "deflate.h" + +/* if your C compiler don't add underline before function name, + define ADD_UNDERLINE_ASMFUNC */ +#ifdef ADD_UNDERLINE_ASMFUNC +#define longest_match_7fff _longest_match_7fff +#define longest_match_686 _longest_match_686 +#define cpudetect32 _cpudetect32 +#endif + + +unsigned long cpudetect32(); + +uInt longest_match_c( + deflate_state *s, + IPos cur_match); /* current match */ + + +uInt longest_match_7fff( + deflate_state *s, + IPos cur_match); /* current match */ + +uInt longest_match_686( + deflate_state *s, + IPos cur_match); /* current match */ + + +static uInt iIsPPro=2; + +void match_init () +{ + iIsPPro = (((cpudetect32()/0x100)&0xf)>=6) ? 1 : 0; +} + +uInt longest_match( + deflate_state *s, + IPos cur_match) /* current match */ +{ + if (iIsPPro!=0) + return longest_match_686(s,cur_match); + + if (s->w_mask != 0x7fff) + return longest_match_686(s,cur_match); + + /* now ((s->w_mask == 0x7fff) && (iIsPPro==0)) */ + return longest_match_7fff(s,cur_match); +} + + +#endif /* defined(ASMV) && (!defined(NOOLDPENTIUMCODE)) */ Added: external/zlib/contrib/masmx86/inffas32.asm ============================================================================== --- (empty file) +++ external/zlib/contrib/masmx86/inffas32.asm Tue Jan 3 07:42:59 2006 @@ -0,0 +1,1083 @@ +;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding +; * +; * inffas32.asm is derivated from inffas86.c, with translation of assembly code +; * +; * Copyright (C) 1995-2003 Mark Adler +; * For conditions of distribution and use, see copyright notice in zlib.h +; * +; * Copyright (C) 2003 Chris Anderson +; * Please use the copyright conditions above. +; * +; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from +; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at +; * the moment. I have successfully compiled and tested this code with gcc2.96, +; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S +; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX +; * enabled. I will attempt to merge the MMX code into this version. Newer +; * versions of this and inffast.S can be found at +; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/ +; * +; * 2005 : modification by Gilles Vollant +; */ +; For Visual C++ 4.x and higher and ML 6.x and higher +; ml.exe is in directory \MASM611C of Win95 DDK +; ml.exe is also distributed in http://www.masm32.com/masmdl.htm +; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/ +; +; +; compile with command line option +; ml /coff /Zi /c /Flinffas32.lst inffas32.asm + +; if you define NO_GZIP (see inflate.h), compile with +; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm + + +; zlib122sup is 0 fort zlib 1.2.2.1 and lower +; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head +; in inflate_state in inflate.h) +zlib1222sup equ 8 + + +IFDEF GUNZIP + INFLATE_MODE_TYPE equ 11 + INFLATE_MODE_BAD equ 26 +ELSE + IFNDEF NO_GUNZIP + INFLATE_MODE_TYPE equ 11 + INFLATE_MODE_BAD equ 26 + ELSE + INFLATE_MODE_TYPE equ 3 + INFLATE_MODE_BAD equ 17 + ENDIF +ENDIF + + +; 75 "inffast.S" +;FILE "inffast.S" + +;;;GLOBAL _inflate_fast + +;;;SECTION .text + + + + .586p + .mmx + + name inflate_fast_x86 + .MODEL FLAT + +_DATA segment +inflate_fast_use_mmx: + dd 1 + + +_TEXT segment +PUBLIC _inflate_fast + +ALIGN 4 +_inflate_fast: + jmp inflate_fast_entry + + + +ALIGN 4 + db 'Fast decoding Code from Chris Anderson' + db 0 + +ALIGN 4 +invalid_literal_length_code_msg: + db 'invalid literal/length code' + db 0 + +ALIGN 4 +invalid_distance_code_msg: + db 'invalid distance code' + db 0 + +ALIGN 4 +invalid_distance_too_far_msg: + db 'invalid distance too far back' + db 0 + + +ALIGN 4 +inflate_fast_mask: +dd 0 +dd 1 +dd 3 +dd 7 +dd 15 +dd 31 +dd 63 +dd 127 +dd 255 +dd 511 +dd 1023 +dd 2047 +dd 4095 +dd 8191 +dd 16383 +dd 32767 +dd 65535 +dd 131071 +dd 262143 +dd 524287 +dd 1048575 +dd 2097151 +dd 4194303 +dd 8388607 +dd 16777215 +dd 33554431 +dd 67108863 +dd 134217727 +dd 268435455 +dd 536870911 +dd 1073741823 +dd 2147483647 +dd 4294967295 + + +mode_state equ 0 ;/* state->mode */ +wsize_state equ (32+zlib1222sup) ;/* state->wsize */ +write_state equ (36+4+zlib1222sup) ;/* state->write */ +window_state equ (40+4+zlib1222sup) ;/* state->window */ +hold_state equ (44+4+zlib1222sup) ;/* state->hold */ +bits_state equ (48+4+zlib1222sup) ;/* state->bits */ +lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */ +distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */ +lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */ +distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */ + + +;;SECTION .text +; 205 "inffast.S" +;GLOBAL inflate_fast_use_mmx + +;SECTION .data + + +; GLOBAL inflate_fast_use_mmx:object +;.size inflate_fast_use_mmx, 4 +; 226 "inffast.S" +;SECTION .text + +ALIGN 4 +inflate_fast_entry: + push edi + push esi + push ebp + push ebx + pushfd + sub esp,64 + cld + + + + + mov esi, [esp+88] + mov edi, [esi+28] + + + + + + + + mov edx, [esi+4] + mov eax, [esi+0] + + add edx,eax + sub edx,11 + + mov [esp+44],eax + mov [esp+20],edx + + mov ebp, [esp+92] + mov ecx, [esi+16] + mov ebx, [esi+12] + + sub ebp,ecx + neg ebp + add ebp,ebx + + sub ecx,257 + add ecx,ebx + + mov [esp+60],ebx + mov [esp+40],ebp + mov [esp+16],ecx +; 285 "inffast.S" + mov eax, [edi+lencode_state] + mov ecx, [edi+distcode_state] + + mov [esp+8],eax + mov [esp+12],ecx + + mov eax,1 + mov ecx, [edi+lenbits_state] + shl eax,cl + dec eax + mov [esp+0],eax + + mov eax,1 + mov ecx, [edi+distbits_state] + shl eax,cl + dec eax + mov [esp+4],eax + + mov eax, [edi+wsize_state] + mov ecx, [edi+write_state] + mov edx, [edi+window_state] + + mov [esp+52],eax + mov [esp+48],ecx + mov [esp+56],edx + + mov ebp, [edi+hold_state] + mov ebx, [edi+bits_state] +; 321 "inffast.S" + mov esi, [esp+44] + mov ecx, [esp+20] + cmp ecx,esi + ja L_align_long + + add ecx,11 + sub ecx,esi + mov eax,12 + sub eax,ecx + lea edi, [esp+28] + rep movsb + mov ecx,eax + xor eax,eax + rep stosb + lea esi, [esp+28] + mov [esp+20],esi + jmp L_is_aligned + + +L_align_long: + test esi,3 + jz L_is_aligned + xor eax,eax + mov al, [esi] + inc esi + mov ecx,ebx + add ebx,8 + shl eax,cl + or ebp,eax + jmp L_align_long + +L_is_aligned: + mov edi, [esp+60] +; 366 "inffast.S" +L_check_mmx: + cmp dword ptr [inflate_fast_use_mmx],2 + je L_init_mmx + ja L_do_loop + + push eax + push ebx + push ecx + push edx + pushfd + mov eax, [esp] + xor dword ptr [esp],0200000h + + + + + popfd + pushfd + pop edx + xor edx,eax + jz L_dont_use_mmx + xor eax,eax + cpuid + cmp ebx,0756e6547h + jne L_dont_use_mmx + cmp ecx,06c65746eh + jne L_dont_use_mmx + cmp edx,049656e69h + jne L_dont_use_mmx + mov eax,1 + cpuid + shr eax,8 + and eax,15 + cmp eax,6 + jne L_dont_use_mmx + test edx,0800000h + jnz L_use_mmx + jmp L_dont_use_mmx +L_use_mmx: + mov dword ptr [inflate_fast_use_mmx],2 + jmp L_check_mmx_pop +L_dont_use_mmx: + mov dword ptr [inflate_fast_use_mmx],3 +L_check_mmx_pop: + pop edx + pop ecx + pop ebx + pop eax + jmp L_check_mmx +; 426 "inffast.S" +ALIGN 4 +L_do_loop: +; 437 "inffast.S" + cmp bl,15 + ja L_get_length_code + + xor eax,eax + lodsw + mov cl,bl + add bl,16 + shl eax,cl + or ebp,eax + +L_get_length_code: + mov edx, [esp+0] + mov ecx, [esp+8] + and edx,ebp + mov eax, [ecx+edx*4] + +L_dolen: + + + + + + + mov cl,ah + sub bl,ah + shr ebp,cl + + + + + + + test al,al + jnz L_test_for_length_base + + shr eax,16 + stosb + +L_while_test: + + + cmp [esp+16],edi + jbe L_break_loop + + cmp [esp+20],esi + ja L_do_loop + jmp L_break_loop + +L_test_for_length_base: +; 502 "inffast.S" + mov edx,eax + shr edx,16 + mov cl,al + + test al,16 + jz L_test_for_second_level_length + and cl,15 + jz L_save_len + cmp bl,cl + jae L_add_bits_to_len + + mov ch,cl + xor eax,eax + lodsw + mov cl,bl + add bl,16 + shl eax,cl + or ebp,eax + mov cl,ch + +L_add_bits_to_len: + mov eax,1 + shl eax,cl + dec eax + sub bl,cl + and eax,ebp + shr ebp,cl + add edx,eax + +L_save_len: + mov [esp+24],edx + + +L_decode_distance: +; 549 "inffast.S" + cmp bl,15 + ja L_get_distance_code + + xor eax,eax + lodsw + mov cl,bl + add bl,16 + shl eax,cl + or ebp,eax + +L_get_distance_code: + mov edx, [esp+4] + mov ecx, [esp+12] + and edx,ebp + mov eax, [ecx+edx*4] + + +L_dodist: + mov edx,eax + shr edx,16 + mov cl,ah + sub bl,ah + shr ebp,cl +; 584 "inffast.S" + mov cl,al + + test al,16 + jz L_test_for_second_level_dist + and cl,15 + jz L_check_dist_one + cmp bl,cl + jae L_add_bits_to_dist + + mov ch,cl + xor eax,eax + lodsw + mov cl,bl + add bl,16 + shl eax,cl + or ebp,eax + mov cl,ch + +L_add_bits_to_dist: + mov eax,1 + shl eax,cl + dec eax + sub bl,cl + and eax,ebp + shr ebp,cl + add edx,eax + jmp L_check_window + +L_check_window: +; 625 "inffast.S" + mov [esp+44],esi + mov eax,edi + sub eax, [esp+40] + + cmp eax,edx + jb L_clip_window + + mov ecx, [esp+24] + mov esi,edi + sub esi,edx + + sub ecx,3 + mov al, [esi] + mov [edi],al + mov al, [esi+1] + mov dl, [esi+2] + add esi,3 + mov [edi+1],al + mov [edi+2],dl + add edi,3 + rep movsb + + mov esi, [esp+44] + jmp L_while_test + +ALIGN 4 +L_check_dist_one: + cmp edx,1 + jne L_check_window + cmp [esp+40],edi + je L_check_window + + dec edi + mov ecx, [esp+24] + mov al, [edi] + sub ecx,3 + + mov [edi+1],al + mov [edi+2],al + mov [edi+3],al + add edi,4 + rep stosb + + jmp L_while_test + +ALIGN 4 +L_test_for_second_level_length: + + + + + test al,64 + jnz L_test_for_end_of_block + + mov eax,1 + shl eax,cl + dec eax + and eax,ebp + add eax,edx + mov edx, [esp+8] + mov eax, [edx+eax*4] + jmp L_dolen + +ALIGN 4 +L_test_for_second_level_dist: + + + + + test al,64 + jnz L_invalid_distance_code + + mov eax,1 + shl eax,cl + dec eax + and eax,ebp + add eax,edx + mov edx, [esp+12] + mov eax, [edx+eax*4] + jmp L_dodist + +ALIGN 4 +L_clip_window: +; 721 "inffast.S" + mov ecx,eax + mov eax, [esp+52] + neg ecx + mov esi, [esp+56] + + cmp eax,edx + jb L_invalid_distance_too_far + + add ecx,edx + cmp dword ptr [esp+48],0 + jne L_wrap_around_window + + sub eax,ecx + add esi,eax +; 749 "inffast.S" + mov eax, [esp+24] + cmp eax,ecx + jbe L_do_copy1 + + sub eax,ecx + rep movsb + mov esi,edi + sub esi,edx + jmp L_do_copy1 + + cmp eax,ecx + jbe L_do_copy1 + + sub eax,ecx + rep movsb + mov esi,edi + sub esi,edx + jmp L_do_copy1 + +L_wrap_around_window: +; 793 "inffast.S" + mov eax, [esp+48] + cmp ecx,eax + jbe L_contiguous_in_window + + add esi, [esp+52] + add esi,eax + sub esi,ecx + sub ecx,eax + + + mov eax, [esp+24] + cmp eax,ecx + jbe L_do_copy1 + + sub eax,ecx + rep movsb + mov esi, [esp+56] + mov ecx, [esp+48] + cmp eax,ecx + jbe L_do_copy1 + + sub eax,ecx + rep movsb + mov esi,edi + sub esi,edx + jmp L_do_copy1 + +L_contiguous_in_window: +; 836 "inffast.S" + add esi,eax + sub esi,ecx + + + mov eax, [esp+24] + cmp eax,ecx + jbe L_do_copy1 + + sub eax,ecx + rep movsb + mov esi,edi + sub esi,edx + +L_do_copy1: +; 862 "inffast.S" + mov ecx,eax + rep movsb + + mov esi, [esp+44] + jmp L_while_test +; 878 "inffast.S" +ALIGN 4 +L_init_mmx: + emms + + + + + + movd mm0,ebp + mov ebp,ebx +; 896 "inffast.S" + movd mm4,[esp+0] + movq mm3,mm4 + movd mm5,[esp+4] + movq mm2,mm5 + pxor mm1,mm1 + mov ebx, [esp+8] + jmp L_do_loop_mmx + +ALIGN 4 +L_do_loop_mmx: + psrlq mm0,mm1 + + cmp ebp,32 + ja L_get_length_code_mmx + + movd mm6,ebp + movd mm7,[esi] + add esi,4 + psllq mm7,mm6 + add ebp,32 + por mm0,mm7 + +L_get_length_code_mmx: + pand mm4,mm0 + movd eax,mm4 + movq mm4,mm3 + mov eax, [ebx+eax*4] + +L_dolen_mmx: + movzx ecx,ah + movd mm1,ecx + sub ebp,ecx + + test al,al + jnz L_test_for_length_base_mmx + + shr eax,16 + stosb + +L_while_test_mmx: + + + cmp [esp+16],edi + jbe L_break_loop + + cmp [esp+20],esi + ja L_do_loop_mmx + jmp L_break_loop + +L_test_for_length_base_mmx: + + mov edx,eax + shr edx,16 + + test al,16 + jz L_test_for_second_level_length_mmx + and eax,15 + jz L_decode_distance_mmx + + psrlq mm0,mm1 + movd mm1,eax + movd ecx,mm0 + sub ebp,eax + and ecx, [inflate_fast_mask+eax*4] + add edx,ecx + +L_decode_distance_mmx: + psrlq mm0,mm1 + + cmp ebp,32 + ja L_get_dist_code_mmx + + movd mm6,ebp + movd mm7,[esi] + add esi,4 + psllq mm7,mm6 + add ebp,32 + por mm0,mm7 + +L_get_dist_code_mmx: + mov ebx, [esp+12] + pand mm5,mm0 + movd eax,mm5 + movq mm5,mm2 + mov eax, [ebx+eax*4] + +L_dodist_mmx: + + movzx ecx,ah + mov ebx,eax + shr ebx,16 + sub ebp,ecx + movd mm1,ecx + + test al,16 + jz L_test_for_second_level_dist_mmx + and eax,15 + jz L_check_dist_one_mmx + +L_add_bits_to_dist_mmx: + psrlq mm0,mm1 + movd mm1,eax + movd ecx,mm0 + sub ebp,eax + and ecx, [inflate_fast_mask+eax*4] + add ebx,ecx + +L_check_window_mmx: + mov [esp+44],esi + mov eax,edi + sub eax, [esp+40] + + cmp eax,ebx + jb L_clip_window_mmx + + mov ecx,edx + mov esi,edi + sub esi,ebx + + sub ecx,3 + mov al, [esi] + mov [edi],al + mov al, [esi+1] + mov dl, [esi+2] + add esi,3 + mov [edi+1],al + mov [edi+2],dl + add edi,3 + rep movsb + + mov esi, [esp+44] + mov ebx, [esp+8] + jmp L_while_test_mmx + +ALIGN 4 +L_check_dist_one_mmx: + cmp ebx,1 + jne L_check_window_mmx + cmp [esp+40],edi + je L_check_window_mmx + + dec edi + mov ecx,edx + mov al, [edi] + sub ecx,3 + + mov [edi+1],al + mov [edi+2],al + mov [edi+3],al + add edi,4 + rep stosb + + mov ebx, [esp+8] + jmp L_while_test_mmx + +ALIGN 4 +L_test_for_second_level_length_mmx: + test al,64 + jnz L_test_for_end_of_block + + and eax,15 + psrlq mm0,mm1 + movd ecx,mm0 + and ecx, [inflate_fast_mask+eax*4] + add ecx,edx + mov eax, [ebx+ecx*4] + jmp L_dolen_mmx + +ALIGN 4 +L_test_for_second_level_dist_mmx: + test al,64 + jnz L_invalid_distance_code + + and eax,15 + psrlq mm0,mm1 + movd ecx,mm0 + and ecx, [inflate_fast_mask+eax*4] + mov eax, [esp+12] + add ecx,ebx + mov eax, [eax+ecx*4] + jmp L_dodist_mmx + +ALIGN 4 +L_clip_window_mmx: + + mov ecx,eax + mov eax, [esp+52] + neg ecx + mov esi, [esp+56] + + cmp eax,ebx + jb L_invalid_distance_too_far + + add ecx,ebx + cmp dword ptr [esp+48],0 + jne L_wrap_around_window_mmx + + sub eax,ecx + add esi,eax + + cmp edx,ecx + jbe L_do_copy1_mmx + + sub edx,ecx + rep movsb + mov esi,edi + sub esi,ebx + jmp L_do_copy1_mmx + + cmp edx,ecx + jbe L_do_copy1_mmx + + sub edx,ecx + rep movsb + mov esi,edi + sub esi,ebx + jmp L_do_copy1_mmx + +L_wrap_around_window_mmx: + + mov eax, [esp+48] + cmp ecx,eax + jbe L_contiguous_in_window_mmx + + add esi, [esp+52] + add esi,eax + sub esi,ecx + sub ecx,eax + + + cmp edx,ecx + jbe L_do_copy1_mmx + + sub edx,ecx + rep movsb + mov esi, [esp+56] + mov ecx, [esp+48] + cmp edx,ecx + jbe L_do_copy1_mmx + + sub edx,ecx + rep movsb + mov esi,edi + sub esi,ebx + jmp L_do_copy1_mmx + +L_contiguous_in_window_mmx: + + add esi,eax + sub esi,ecx + + + cmp edx,ecx + jbe L_do_copy1_mmx + + sub edx,ecx + rep movsb + mov esi,edi + sub esi,ebx + +L_do_copy1_mmx: + + + mov ecx,edx + rep movsb + + mov esi, [esp+44] + mov ebx, [esp+8] + jmp L_while_test_mmx +; 1174 "inffast.S" +L_invalid_distance_code: + + + + + + mov ecx, invalid_distance_code_msg + mov edx,INFLATE_MODE_BAD + jmp L_update_stream_state + +L_test_for_end_of_block: + + + + + + test al,32 + jz L_invalid_literal_length_code + + mov ecx,0 + mov edx,INFLATE_MODE_TYPE + jmp L_update_stream_state + +L_invalid_literal_length_code: + + + + + + mov ecx, invalid_literal_length_code_msg + mov edx,INFLATE_MODE_BAD + jmp L_update_stream_state + +L_invalid_distance_too_far: + + + + mov esi, [esp+44] + mov ecx, invalid_distance_too_far_msg + mov edx,INFLATE_MODE_BAD + jmp L_update_stream_state + +L_update_stream_state: + + mov eax, [esp+88] + test ecx,ecx + jz L_skip_msg + mov [eax+24],ecx +L_skip_msg: + mov eax, [eax+28] + mov [eax+mode_state],edx + jmp L_break_loop + +ALIGN 4 +L_break_loop: +; 1243 "inffast.S" + cmp dword ptr [inflate_fast_use_mmx],2 + jne L_update_next_in + + + + mov ebx,ebp + +L_update_next_in: +; 1266 "inffast.S" + mov eax, [esp+88] + mov ecx,ebx + mov edx, [eax+28] + shr ecx,3 + sub esi,ecx + shl ecx,3 + sub ebx,ecx + mov [eax+12],edi + mov [edx+bits_state],ebx + mov ecx,ebx + + lea ebx, [esp+28] + cmp [esp+20],ebx + jne L_buf_not_used + + sub esi,ebx + mov ebx, [eax+0] + mov [esp+20],ebx + add esi,ebx + mov ebx, [eax+4] + sub ebx,11 + add [esp+20],ebx + +L_buf_not_used: + mov [eax+0],esi + + mov ebx,1 + shl ebx,cl + dec ebx + + + + + + cmp dword ptr [inflate_fast_use_mmx],2 + jne L_update_hold + + + + psrlq mm0,mm1 + movd ebp,mm0 + + emms + +L_update_hold: + + + + and ebp,ebx + mov [edx+hold_state],ebp + + + + + mov ebx, [esp+20] + cmp ebx,esi + jbe L_last_is_smaller + + sub ebx,esi + add ebx,11 + mov [eax+4],ebx + jmp L_fixup_out +L_last_is_smaller: + sub esi,ebx + neg esi + add esi,11 + mov [eax+4],esi + + + + +L_fixup_out: + + mov ebx, [esp+16] + cmp ebx,edi + jbe L_end_is_smaller + + sub ebx,edi + add ebx,257 + mov [eax+16],ebx + jmp L_done +L_end_is_smaller: + sub edi,ebx + neg edi + add edi,257 + mov [eax+16],edi + + + + + +L_done: + add esp,64 + popfd + pop ebx + pop ebp + pop esi + pop edi + ret + +_TEXT ends +end Added: external/zlib/contrib/masmx86/inffas32.obj ============================================================================== Binary file. No diff available. Added: external/zlib/contrib/masmx86/mkasm.bat ============================================================================== --- (empty file) +++ external/zlib/contrib/masmx86/mkasm.bat Tue Jan 3 07:42:59 2006 @@ -0,0 +1,3 @@ +cl /DASMV /I..\.. /O2 /c gvmat32c.c +ml /coff /Zi /c /Flgvmat32.lst gvmat32.asm +ml /coff /Zi /c /Flinffas32.lst inffas32.asm Added: external/zlib/contrib/masmx86/readme.txt ============================================================================== --- (empty file) +++ external/zlib/contrib/masmx86/readme.txt Tue Jan 3 07:42:59 2006 @@ -0,0 +1,21 @@ + +Summary +------- +This directory contains ASM implementations of the functions +longest_match() and inflate_fast(). + + +Use instructions +---------------- +Copy these files into the zlib source directory, then run the +appropriate makefile, as suggested below. + + +Build instructions +------------------ +* With Microsoft C and MASM: +nmake -f win32/Makefile.msc LOC="-DASMV -DASMINF" OBJA="gvmat32c.obj gvmat32.obj inffas32.obj" + +* With Borland C and TASM: +make -f win32/Makefile.bor LOCAL_ZLIB="-DASMV -DASMINF" OBJA="gvmat32c.obj gvmat32.obj inffas32.obj" OBJPA="+gvmat32c.obj+gvmat32.obj+inffas32.obj" + Added: external/zlib/contrib/minizip/ChangeLogUnzip ============================================================================== --- (empty file) +++ external/zlib/contrib/minizip/ChangeLogUnzip Tue Jan 3 07:42:59 2006 @@ -0,0 +1,67 @@ +Change in 1.01e (12 feb 05) +- Fix in zipOpen2 for globalcomment (Rolf Kalbermatter) +- Fix possible memory leak in unzip.c (Zoran Stevanovic) + +Change in 1.01b (20 may 04) +- Integrate patch from Debian package (submited by Mark Brown) +- Add tools mztools from Xavier Roche + +Change in 1.01 (8 may 04) +- fix buffer overrun risk in unzip.c (Xavier Roche) +- fix a minor buffer insecurity in minizip.c (Mike Whittaker) + +Change in 1.00: (10 sept 03) +- rename to 1.00 +- cosmetic code change + +Change in 0.22: (19 May 03) +- crypting support (unless you define NOCRYPT) +- append file in existing zipfile + +Change in 0.21: (10 Mar 03) +- bug fixes + +Change in 0.17: (27 Jan 02) +- bug fixes + +Change in 0.16: (19 Jan 02) +- Support of ioapi for virtualize zip file access + +Change in 0.15: (19 Mar 98) +- fix memory leak in minizip.c + +Change in 0.14: (10 Mar 98) +- fix bugs in minizip.c sample for zipping big file +- fix problem in month in date handling +- fix bug in unzlocal_GetCurrentFileInfoInternal in unzip.c for + comment handling + +Change in 0.13: (6 Mar 98) +- fix bugs in zip.c +- add real minizip sample + +Change in 0.12: (4 Mar 98) +- add zip.c and zip.h for creates .zip file +- fix change_file_date in miniunz.c for Unix (Jean-loup Gailly) +- fix miniunz.c for file without specific record for directory + +Change in 0.11: (3 Mar 98) +- fix bug in unzGetCurrentFileInfo for get extra field and comment +- enhance miniunz sample, remove the bad unztst.c sample + +Change in 0.10: (2 Mar 98) +- fix bug in unzReadCurrentFile +- rename unzip* to unz* function and structure +- remove Windows-like hungary notation variable name +- modify some structure in unzip.h +- add somes comment in source +- remove unzipGetcCurrentFile function +- replace ZUNZEXPORT by ZEXPORT +- add unzGetLocalExtrafield for get the local extrafield info +- add a new sample, miniunz.c + +Change in 0.4: (25 Feb 98) +- suppress the type unzipFileInZip. + Only on file in the zipfile can be open at the same time +- fix somes typo in code +- added tm_unz structure in unzip_file_info (date/time in readable format) Added: external/zlib/contrib/minizip/Makefile ============================================================================== --- (empty file) +++ external/zlib/contrib/minizip/Makefile Tue Jan 3 07:42:59 2006 @@ -0,0 +1,25 @@ +CC=cc +CFLAGS=-O -I../.. + +UNZ_OBJS = miniunz.o unzip.o ioapi.o ../../libz.a +ZIP_OBJS = minizip.o zip.o ioapi.o ../../libz.a + +.c.o: + $(CC) -c $(CFLAGS) $*.c + +all: miniunz minizip + +miniunz: $(UNZ_OBJS) + $(CC) $(CFLAGS) -o $@ $(UNZ_OBJS) + +minizip: $(ZIP_OBJS) + $(CC) $(CFLAGS) -o $@ $(ZIP_OBJS) + +test: miniunz minizip + ./minizip test readme.txt + ./miniunz -l test.zip + mv readme.txt readme.old + ./miniunz test.zip + +clean: + /bin/rm -f *.o *~ minizip miniunz Added: external/zlib/contrib/minizip/crypt.h ============================================================================== --- (empty file) +++ external/zlib/contrib/minizip/crypt.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,132 @@ +/* crypt.h -- base code for crypt/uncrypt ZIPfile + + + Version 1.01e, February 12th, 2005 + + Copyright (C) 1998-2005 Gilles Vollant + + This code is a modified version of crypting code in Infozip distribution + + The encryption/decryption parts of this source code (as opposed to the + non-echoing password parts) were originally written in Europe. The + whole source package can be freely distributed, including from the USA. + (Prior to January 2000, re-export from the US was a violation of US law.) + + This encryption code is a direct transcription of the algorithm from + Roger Schlafly, described by Phil Katz in the file appnote.txt. This + file (appnote.txt) is distributed with the PKZIP program (even in the + version without encryption capabilities). + + If you don't need crypting in your application, just define symbols + NOCRYPT and NOUNCRYPT. + + This code support the "Traditional PKWARE Encryption". + + The new AES encryption added on Zip format by Winzip (see the page + http://www.winzip.com/aes_info.htm ) and PKWare PKZip 5.x Strong + Encryption is not supported. +*/ + +#define CRC32(c, b) ((*(pcrc_32_tab+(((int)(c) ^ (b)) & 0xff))) ^ ((c) >> 8)) + +/*********************************************************************** + * Return the next byte in the pseudo-random sequence + */ +static int decrypt_byte(unsigned long* pkeys, const unsigned long* pcrc_32_tab) +{ + unsigned temp; /* POTENTIAL BUG: temp*(temp^1) may overflow in an + * unpredictable manner on 16-bit systems; not a problem + * with any known compiler so far, though */ + + temp = ((unsigned)(*(pkeys+2)) & 0xffff) | 2; + return (int)(((temp * (temp ^ 1)) >> 8) & 0xff); +} + +/*********************************************************************** + * Update the encryption keys with the next byte of plain text + */ +static int update_keys(unsigned long* pkeys,const unsigned long* pcrc_32_tab,int c) +{ + (*(pkeys+0)) = CRC32((*(pkeys+0)), c); + (*(pkeys+1)) += (*(pkeys+0)) & 0xff; + (*(pkeys+1)) = (*(pkeys+1)) * 134775813L + 1; + { + register int keyshift = (int)((*(pkeys+1)) >> 24); + (*(pkeys+2)) = CRC32((*(pkeys+2)), keyshift); + } + return c; +} + + +/*********************************************************************** + * Initialize the encryption keys and the random header according to + * the given password. + */ +static void init_keys(const char* passwd,unsigned long* pkeys,const unsigned long* pcrc_32_tab) +{ + *(pkeys+0) = 305419896L; + *(pkeys+1) = 591751049L; + *(pkeys+2) = 878082192L; + while (*passwd != '\0') { + update_keys(pkeys,pcrc_32_tab,(int)*passwd); + passwd++; + } +} + +#define zdecode(pkeys,pcrc_32_tab,c) \ + (update_keys(pkeys,pcrc_32_tab,c ^= decrypt_byte(pkeys,pcrc_32_tab))) + +#define zencode(pkeys,pcrc_32_tab,c,t) \ + (t=decrypt_byte(pkeys,pcrc_32_tab), update_keys(pkeys,pcrc_32_tab,c), t^(c)) + +#ifdef INCLUDECRYPTINGCODE_IFCRYPTALLOWED + +#define RAND_HEAD_LEN 12 + /* "last resort" source for second part of crypt seed pattern */ +# ifndef ZCR_SEED2 +# define ZCR_SEED2 3141592654UL /* use PI as default pattern */ +# endif + +static int crypthead(passwd, buf, bufSize, pkeys, pcrc_32_tab, crcForCrypting) + const char *passwd; /* password string */ + unsigned char *buf; /* where to write header */ + int bufSize; + unsigned long* pkeys; + const unsigned long* pcrc_32_tab; + unsigned long crcForCrypting; +{ + int n; /* index in random header */ + int t; /* temporary */ + int c; /* random byte */ + unsigned char header[RAND_HEAD_LEN-2]; /* random header */ + static unsigned calls = 0; /* ensure different random header each time */ + + if (bufSize> 7) & 0xff; + header[n] = (unsigned char)zencode(pkeys, pcrc_32_tab, c, t); + } + /* Encrypt random header (last two bytes is high word of crc) */ + init_keys(passwd, pkeys, pcrc_32_tab); + for (n = 0; n < RAND_HEAD_LEN-2; n++) + { + buf[n] = (unsigned char)zencode(pkeys, pcrc_32_tab, header[n], t); + } + buf[n++] = zencode(pkeys, pcrc_32_tab, (int)(crcForCrypting >> 16) & 0xff, t); + buf[n++] = zencode(pkeys, pcrc_32_tab, (int)(crcForCrypting >> 24) & 0xff, t); + return n; +} + +#endif Added: external/zlib/contrib/minizip/ioapi.c ============================================================================== --- (empty file) +++ external/zlib/contrib/minizip/ioapi.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,177 @@ +/* ioapi.c -- IO base function header for compress/uncompress .zip + files using zlib + zip or unzip API + + Version 1.01e, February 12th, 2005 + + Copyright (C) 1998-2005 Gilles Vollant +*/ + +#include +#include +#include + +#include "zlib.h" +#include "ioapi.h" + + + +/* I've found an old Unix (a SunOS 4.1.3_U1) without all SEEK_* defined.... */ + +#ifndef SEEK_CUR +#define SEEK_CUR 1 +#endif + +#ifndef SEEK_END +#define SEEK_END 2 +#endif + +#ifndef SEEK_SET +#define SEEK_SET 0 +#endif + +voidpf ZCALLBACK fopen_file_func OF(( + voidpf opaque, + const char* filename, + int mode)); + +uLong ZCALLBACK fread_file_func OF(( + voidpf opaque, + voidpf stream, + void* buf, + uLong size)); + +uLong ZCALLBACK fwrite_file_func OF(( + voidpf opaque, + voidpf stream, + const void* buf, + uLong size)); + +long ZCALLBACK ftell_file_func OF(( + voidpf opaque, + voidpf stream)); + +long ZCALLBACK fseek_file_func OF(( + voidpf opaque, + voidpf stream, + uLong offset, + int origin)); + +int ZCALLBACK fclose_file_func OF(( + voidpf opaque, + voidpf stream)); + +int ZCALLBACK ferror_file_func OF(( + voidpf opaque, + voidpf stream)); + + +voidpf ZCALLBACK fopen_file_func (opaque, filename, mode) + voidpf opaque; + const char* filename; + int mode; +{ + FILE* file = NULL; + const char* mode_fopen = NULL; + if ((mode & ZLIB_FILEFUNC_MODE_READWRITEFILTER)==ZLIB_FILEFUNC_MODE_READ) + mode_fopen = "rb"; + else + if (mode & ZLIB_FILEFUNC_MODE_EXISTING) + mode_fopen = "r+b"; + else + if (mode & ZLIB_FILEFUNC_MODE_CREATE) + mode_fopen = "wb"; + + if ((filename!=NULL) && (mode_fopen != NULL)) + file = fopen(filename, mode_fopen); + return file; +} + + +uLong ZCALLBACK fread_file_func (opaque, stream, buf, size) + voidpf opaque; + voidpf stream; + void* buf; + uLong size; +{ + uLong ret; + ret = (uLong)fread(buf, 1, (size_t)size, (FILE *)stream); + return ret; +} + + +uLong ZCALLBACK fwrite_file_func (opaque, stream, buf, size) + voidpf opaque; + voidpf stream; + const void* buf; + uLong size; +{ + uLong ret; + ret = (uLong)fwrite(buf, 1, (size_t)size, (FILE *)stream); + return ret; +} + +long ZCALLBACK ftell_file_func (opaque, stream) + voidpf opaque; + voidpf stream; +{ + long ret; + ret = ftell((FILE *)stream); + return ret; +} + +long ZCALLBACK fseek_file_func (opaque, stream, offset, origin) + voidpf opaque; + voidpf stream; + uLong offset; + int origin; +{ + int fseek_origin=0; + long ret; + switch (origin) + { + case ZLIB_FILEFUNC_SEEK_CUR : + fseek_origin = SEEK_CUR; + break; + case ZLIB_FILEFUNC_SEEK_END : + fseek_origin = SEEK_END; + break; + case ZLIB_FILEFUNC_SEEK_SET : + fseek_origin = SEEK_SET; + break; + default: return -1; + } + ret = 0; + fseek((FILE *)stream, offset, fseek_origin); + return ret; +} + +int ZCALLBACK fclose_file_func (opaque, stream) + voidpf opaque; + voidpf stream; +{ + int ret; + ret = fclose((FILE *)stream); + return ret; +} + +int ZCALLBACK ferror_file_func (opaque, stream) + voidpf opaque; + voidpf stream; +{ + int ret; + ret = ferror((FILE *)stream); + return ret; +} + +void fill_fopen_filefunc (pzlib_filefunc_def) + zlib_filefunc_def* pzlib_filefunc_def; +{ + pzlib_filefunc_def->zopen_file = fopen_file_func; + pzlib_filefunc_def->zread_file = fread_file_func; + pzlib_filefunc_def->zwrite_file = fwrite_file_func; + pzlib_filefunc_def->ztell_file = ftell_file_func; + pzlib_filefunc_def->zseek_file = fseek_file_func; + pzlib_filefunc_def->zclose_file = fclose_file_func; + pzlib_filefunc_def->zerror_file = ferror_file_func; + pzlib_filefunc_def->opaque = NULL; +} Added: external/zlib/contrib/minizip/ioapi.h ============================================================================== --- (empty file) +++ external/zlib/contrib/minizip/ioapi.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,75 @@ +/* ioapi.h -- IO base function header for compress/uncompress .zip + files using zlib + zip or unzip API + + Version 1.01e, February 12th, 2005 + + Copyright (C) 1998-2005 Gilles Vollant +*/ + +#ifndef _ZLIBIOAPI_H +#define _ZLIBIOAPI_H + + +#define ZLIB_FILEFUNC_SEEK_CUR (1) +#define ZLIB_FILEFUNC_SEEK_END (2) +#define ZLIB_FILEFUNC_SEEK_SET (0) + +#define ZLIB_FILEFUNC_MODE_READ (1) +#define ZLIB_FILEFUNC_MODE_WRITE (2) +#define ZLIB_FILEFUNC_MODE_READWRITEFILTER (3) + +#define ZLIB_FILEFUNC_MODE_EXISTING (4) +#define ZLIB_FILEFUNC_MODE_CREATE (8) + + +#ifndef ZCALLBACK + +#if (defined(WIN32) || defined (WINDOWS) || defined (_WINDOWS)) && defined(CALLBACK) && defined (USEWINDOWS_CALLBACK) +#define ZCALLBACK CALLBACK +#else +#define ZCALLBACK +#endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef voidpf (ZCALLBACK *open_file_func) OF((voidpf opaque, const char* filename, int mode)); +typedef uLong (ZCALLBACK *read_file_func) OF((voidpf opaque, voidpf stream, void* buf, uLong size)); +typedef uLong (ZCALLBACK *write_file_func) OF((voidpf opaque, voidpf stream, const void* buf, uLong size)); +typedef long (ZCALLBACK *tell_file_func) OF((voidpf opaque, voidpf stream)); +typedef long (ZCALLBACK *seek_file_func) OF((voidpf opaque, voidpf stream, uLong offset, int origin)); +typedef int (ZCALLBACK *close_file_func) OF((voidpf opaque, voidpf stream)); +typedef int (ZCALLBACK *testerror_file_func) OF((voidpf opaque, voidpf stream)); + +typedef struct zlib_filefunc_def_s +{ + open_file_func zopen_file; + read_file_func zread_file; + write_file_func zwrite_file; + tell_file_func ztell_file; + seek_file_func zseek_file; + close_file_func zclose_file; + testerror_file_func zerror_file; + voidpf opaque; +} zlib_filefunc_def; + + + +void fill_fopen_filefunc OF((zlib_filefunc_def* pzlib_filefunc_def)); + +#define ZREAD(filefunc,filestream,buf,size) ((*((filefunc).zread_file))((filefunc).opaque,filestream,buf,size)) +#define ZWRITE(filefunc,filestream,buf,size) ((*((filefunc).zwrite_file))((filefunc).opaque,filestream,buf,size)) +#define ZTELL(filefunc,filestream) ((*((filefunc).ztell_file))((filefunc).opaque,filestream)) +#define ZSEEK(filefunc,filestream,pos,mode) ((*((filefunc).zseek_file))((filefunc).opaque,filestream,pos,mode)) +#define ZCLOSE(filefunc,filestream) ((*((filefunc).zclose_file))((filefunc).opaque,filestream)) +#define ZERROR(filefunc,filestream) ((*((filefunc).zerror_file))((filefunc).opaque,filestream)) + + +#ifdef __cplusplus +} +#endif + +#endif + Added: external/zlib/contrib/minizip/iowin32.c ============================================================================== --- (empty file) +++ external/zlib/contrib/minizip/iowin32.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,270 @@ +/* iowin32.c -- IO base function header for compress/uncompress .zip + files using zlib + zip or unzip API + This IO API version uses the Win32 API (for Microsoft Windows) + + Version 1.01e, February 12th, 2005 + + Copyright (C) 1998-2005 Gilles Vollant +*/ + +#include + +#include "zlib.h" +#include "ioapi.h" +#include "iowin32.h" + +#ifndef INVALID_HANDLE_VALUE +#define INVALID_HANDLE_VALUE (0xFFFFFFFF) +#endif + +#ifndef INVALID_SET_FILE_POINTER +#define INVALID_SET_FILE_POINTER ((DWORD)-1) +#endif + +voidpf ZCALLBACK win32_open_file_func OF(( + voidpf opaque, + const char* filename, + int mode)); + +uLong ZCALLBACK win32_read_file_func OF(( + voidpf opaque, + voidpf stream, + void* buf, + uLong size)); + +uLong ZCALLBACK win32_write_file_func OF(( + voidpf opaque, + voidpf stream, + const void* buf, + uLong size)); + +long ZCALLBACK win32_tell_file_func OF(( + voidpf opaque, + voidpf stream)); + +long ZCALLBACK win32_seek_file_func OF(( + voidpf opaque, + voidpf stream, + uLong offset, + int origin)); + +int ZCALLBACK win32_close_file_func OF(( + voidpf opaque, + voidpf stream)); + +int ZCALLBACK win32_error_file_func OF(( + voidpf opaque, + voidpf stream)); + +typedef struct +{ + HANDLE hf; + int error; +} WIN32FILE_IOWIN; + +voidpf ZCALLBACK win32_open_file_func (opaque, filename, mode) + voidpf opaque; + const char* filename; + int mode; +{ + const char* mode_fopen = NULL; + DWORD dwDesiredAccess,dwCreationDisposition,dwShareMode,dwFlagsAndAttributes ; + HANDLE hFile = 0; + voidpf ret=NULL; + + dwDesiredAccess = dwShareMode = dwFlagsAndAttributes = 0; + + if ((mode & ZLIB_FILEFUNC_MODE_READWRITEFILTER)==ZLIB_FILEFUNC_MODE_READ) + { + dwDesiredAccess = GENERIC_READ; + dwCreationDisposition = OPEN_EXISTING; + dwShareMode = FILE_SHARE_READ; + } + else + if (mode & ZLIB_FILEFUNC_MODE_EXISTING) + { + dwDesiredAccess = GENERIC_WRITE | GENERIC_READ; + dwCreationDisposition = OPEN_EXISTING; + } + else + if (mode & ZLIB_FILEFUNC_MODE_CREATE) + { + dwDesiredAccess = GENERIC_WRITE | GENERIC_READ; + dwCreationDisposition = CREATE_ALWAYS; + } + + if ((filename!=NULL) && (dwDesiredAccess != 0)) + hFile = CreateFile((LPCTSTR)filename, dwDesiredAccess, dwShareMode, NULL, + dwCreationDisposition, dwFlagsAndAttributes, NULL); + + if (hFile == INVALID_HANDLE_VALUE) + hFile = NULL; + + if (hFile != NULL) + { + WIN32FILE_IOWIN w32fiow; + w32fiow.hf = hFile; + w32fiow.error = 0; + ret = malloc(sizeof(WIN32FILE_IOWIN)); + if (ret==NULL) + CloseHandle(hFile); + else *((WIN32FILE_IOWIN*)ret) = w32fiow; + } + return ret; +} + + +uLong ZCALLBACK win32_read_file_func (opaque, stream, buf, size) + voidpf opaque; + voidpf stream; + void* buf; + uLong size; +{ + uLong ret=0; + HANDLE hFile = NULL; + if (stream!=NULL) + hFile = ((WIN32FILE_IOWIN*)stream) -> hf; + if (hFile != NULL) + if (!ReadFile(hFile, buf, size, &ret, NULL)) + { + DWORD dwErr = GetLastError(); + if (dwErr == ERROR_HANDLE_EOF) + dwErr = 0; + ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr; + } + + return ret; +} + + +uLong ZCALLBACK win32_write_file_func (opaque, stream, buf, size) + voidpf opaque; + voidpf stream; + const void* buf; + uLong size; +{ + uLong ret=0; + HANDLE hFile = NULL; + if (stream!=NULL) + hFile = ((WIN32FILE_IOWIN*)stream) -> hf; + + if (hFile !=NULL) + if (!WriteFile(hFile, buf, size, &ret, NULL)) + { + DWORD dwErr = GetLastError(); + if (dwErr == ERROR_HANDLE_EOF) + dwErr = 0; + ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr; + } + + return ret; +} + +long ZCALLBACK win32_tell_file_func (opaque, stream) + voidpf opaque; + voidpf stream; +{ + long ret=-1; + HANDLE hFile = NULL; + if (stream!=NULL) + hFile = ((WIN32FILE_IOWIN*)stream) -> hf; + if (hFile != NULL) + { + DWORD dwSet = SetFilePointer(hFile, 0, NULL, FILE_CURRENT); + if (dwSet == INVALID_SET_FILE_POINTER) + { + DWORD dwErr = GetLastError(); + ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr; + ret = -1; + } + else + ret=(long)dwSet; + } + return ret; +} + +long ZCALLBACK win32_seek_file_func (opaque, stream, offset, origin) + voidpf opaque; + voidpf stream; + uLong offset; + int origin; +{ + DWORD dwMoveMethod=0xFFFFFFFF; + HANDLE hFile = NULL; + + long ret=-1; + if (stream!=NULL) + hFile = ((WIN32FILE_IOWIN*)stream) -> hf; + switch (origin) + { + case ZLIB_FILEFUNC_SEEK_CUR : + dwMoveMethod = FILE_CURRENT; + break; + case ZLIB_FILEFUNC_SEEK_END : + dwMoveMethod = FILE_END; + break; + case ZLIB_FILEFUNC_SEEK_SET : + dwMoveMethod = FILE_BEGIN; + break; + default: return -1; + } + + if (hFile != NULL) + { + DWORD dwSet = SetFilePointer(hFile, offset, NULL, dwMoveMethod); + if (dwSet == INVALID_SET_FILE_POINTER) + { + DWORD dwErr = GetLastError(); + ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr; + ret = -1; + } + else + ret=0; + } + return ret; +} + +int ZCALLBACK win32_close_file_func (opaque, stream) + voidpf opaque; + voidpf stream; +{ + int ret=-1; + + if (stream!=NULL) + { + HANDLE hFile; + hFile = ((WIN32FILE_IOWIN*)stream) -> hf; + if (hFile != NULL) + { + CloseHandle(hFile); + ret=0; + } + free(stream); + } + return ret; +} + +int ZCALLBACK win32_error_file_func (opaque, stream) + voidpf opaque; + voidpf stream; +{ + int ret=-1; + if (stream!=NULL) + { + ret = ((WIN32FILE_IOWIN*)stream) -> error; + } + return ret; +} + +void fill_win32_filefunc (pzlib_filefunc_def) + zlib_filefunc_def* pzlib_filefunc_def; +{ + pzlib_filefunc_def->zopen_file = win32_open_file_func; + pzlib_filefunc_def->zread_file = win32_read_file_func; + pzlib_filefunc_def->zwrite_file = win32_write_file_func; + pzlib_filefunc_def->ztell_file = win32_tell_file_func; + pzlib_filefunc_def->zseek_file = win32_seek_file_func; + pzlib_filefunc_def->zclose_file = win32_close_file_func; + pzlib_filefunc_def->zerror_file = win32_error_file_func; + pzlib_filefunc_def->opaque=NULL; +} Added: external/zlib/contrib/minizip/iowin32.h ============================================================================== --- (empty file) +++ external/zlib/contrib/minizip/iowin32.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,21 @@ +/* iowin32.h -- IO base function header for compress/uncompress .zip + files using zlib + zip or unzip API + This IO API version uses the Win32 API (for Microsoft Windows) + + Version 1.01e, February 12th, 2005 + + Copyright (C) 1998-2005 Gilles Vollant +*/ + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + +void fill_win32_filefunc OF((zlib_filefunc_def* pzlib_filefunc_def)); + +#ifdef __cplusplus +} +#endif Added: external/zlib/contrib/minizip/miniunz.c ============================================================================== --- (empty file) +++ external/zlib/contrib/minizip/miniunz.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,585 @@ +/* + miniunz.c + Version 1.01e, February 12th, 2005 + + Copyright (C) 1998-2005 Gilles Vollant +*/ + + +#include +#include +#include +#include +#include +#include + +#ifdef unix +# include +# include +#else +# include +# include +#endif + +#include "unzip.h" + +#define CASESENSITIVITY (0) +#define WRITEBUFFERSIZE (8192) +#define MAXFILENAME (256) + +#ifdef WIN32 +#define USEWIN32IOAPI +#include "iowin32.h" +#endif +/* + mini unzip, demo of unzip package + + usage : + Usage : miniunz [-exvlo] file.zip [file_to_extract] [-d extractdir] + + list the file in the zipfile, and print the content of FILE_ID.ZIP or README.TXT + if it exists +*/ + + +/* change_file_date : change the date/time of a file + filename : the filename of the file where date/time must be modified + dosdate : the new date at the MSDos format (4 bytes) + tmu_date : the SAME new date at the tm_unz format */ +void change_file_date(filename,dosdate,tmu_date) + const char *filename; + uLong dosdate; + tm_unz tmu_date; +{ +#ifdef WIN32 + HANDLE hFile; + FILETIME ftm,ftLocal,ftCreate,ftLastAcc,ftLastWrite; + + hFile = CreateFile(filename,GENERIC_READ | GENERIC_WRITE, + 0,NULL,OPEN_EXISTING,0,NULL); + GetFileTime(hFile,&ftCreate,&ftLastAcc,&ftLastWrite); + DosDateTimeToFileTime((WORD)(dosdate>>16),(WORD)dosdate,&ftLocal); + LocalFileTimeToFileTime(&ftLocal,&ftm); + SetFileTime(hFile,&ftm,&ftLastAcc,&ftm); + CloseHandle(hFile); +#else +#ifdef unix + struct utimbuf ut; + struct tm newdate; + newdate.tm_sec = tmu_date.tm_sec; + newdate.tm_min=tmu_date.tm_min; + newdate.tm_hour=tmu_date.tm_hour; + newdate.tm_mday=tmu_date.tm_mday; + newdate.tm_mon=tmu_date.tm_mon; + if (tmu_date.tm_year > 1900) + newdate.tm_year=tmu_date.tm_year - 1900; + else + newdate.tm_year=tmu_date.tm_year ; + newdate.tm_isdst=-1; + + ut.actime=ut.modtime=mktime(&newdate); + utime(filename,&ut); +#endif +#endif +} + + +/* mymkdir and change_file_date are not 100 % portable + As I don't know well Unix, I wait feedback for the unix portion */ + +int mymkdir(dirname) + const char* dirname; +{ + int ret=0; +#ifdef WIN32 + ret = mkdir(dirname); +#else +#ifdef unix + ret = mkdir (dirname,0775); +#endif +#endif + return ret; +} + +int makedir (newdir) + char *newdir; +{ + char *buffer ; + char *p; + int len = (int)strlen(newdir); + + if (len <= 0) + return 0; + + buffer = (char*)malloc(len+1); + strcpy(buffer,newdir); + + if (buffer[len-1] == '/') { + buffer[len-1] = '\0'; + } + if (mymkdir(buffer) == 0) + { + free(buffer); + return 1; + } + + p = buffer+1; + while (1) + { + char hold; + + while(*p && *p != '\\' && *p != '/') + p++; + hold = *p; + *p = 0; + if ((mymkdir(buffer) == -1) && (errno == ENOENT)) + { + printf("couldn't create directory %s\n",buffer); + free(buffer); + return 0; + } + if (hold == 0) + break; + *p++ = hold; + } + free(buffer); + return 1; +} + +void do_banner() +{ + printf("MiniUnz 1.01b, demo of zLib + Unz package written by Gilles Vollant\n"); + printf("more info at http://www.winimage.com/zLibDll/unzip.html\n\n"); +} + +void do_help() +{ + printf("Usage : miniunz [-e] [-x] [-v] [-l] [-o] [-p password] file.zip [file_to_extr.] [-d extractdir]\n\n" \ + " -e Extract without pathname (junk paths)\n" \ + " -x Extract with pathname\n" \ + " -v list files\n" \ + " -l list files\n" \ + " -d directory to extract into\n" \ + " -o overwrite files without prompting\n" \ + " -p extract crypted file using password\n\n"); +} + + +int do_list(uf) + unzFile uf; +{ + uLong i; + unz_global_info gi; + int err; + + err = unzGetGlobalInfo (uf,&gi); + if (err!=UNZ_OK) + printf("error %d with zipfile in unzGetGlobalInfo \n",err); + printf(" Length Method Size Ratio Date Time CRC-32 Name\n"); + printf(" ------ ------ ---- ----- ---- ---- ------ ----\n"); + for (i=0;i0) + ratio = (file_info.compressed_size*100)/file_info.uncompressed_size; + + /* display a '*' if the file is crypted */ + if ((file_info.flag & 1) != 0) + charCrypt='*'; + + if (file_info.compression_method==0) + string_method="Stored"; + else + if (file_info.compression_method==Z_DEFLATED) + { + uInt iLevel=(uInt)((file_info.flag & 0x6)/2); + if (iLevel==0) + string_method="Defl:N"; + else if (iLevel==1) + string_method="Defl:X"; + else if ((iLevel==2) || (iLevel==3)) + string_method="Defl:F"; /* 2:fast , 3 : extra fast*/ + } + else + string_method="Unkn. "; + + printf("%7lu %6s%c%7lu %3lu%% %2.2lu-%2.2lu-%2.2lu %2.2lu:%2.2lu %8.8lx %s\n", + file_info.uncompressed_size,string_method, + charCrypt, + file_info.compressed_size, + ratio, + (uLong)file_info.tmu_date.tm_mon + 1, + (uLong)file_info.tmu_date.tm_mday, + (uLong)file_info.tmu_date.tm_year % 100, + (uLong)file_info.tmu_date.tm_hour,(uLong)file_info.tmu_date.tm_min, + (uLong)file_info.crc,filename_inzip); + if ((i+1)='a') && (rep<='z')) + rep -= 0x20; + } + while ((rep!='Y') && (rep!='N') && (rep!='A')); + } + + if (rep == 'N') + skip = 1; + + if (rep == 'A') + *popt_overwrite=1; + } + + if ((skip==0) && (err==UNZ_OK)) + { + fout=fopen(write_filename,"wb"); + + /* some zipfile don't contain directory alone before file */ + if ((fout==NULL) && ((*popt_extract_without_path)==0) && + (filename_withoutpath!=(char*)filename_inzip)) + { + char c=*(filename_withoutpath-1); + *(filename_withoutpath-1)='\0'; + makedir(write_filename); + *(filename_withoutpath-1)=c; + fout=fopen(write_filename,"wb"); + } + + if (fout==NULL) + { + printf("error opening %s\n",write_filename); + } + } + + if (fout!=NULL) + { + printf(" extracting: %s\n",write_filename); + + do + { + err = unzReadCurrentFile(uf,buf,size_buf); + if (err<0) + { + printf("error %d with zipfile in unzReadCurrentFile\n",err); + break; + } + if (err>0) + if (fwrite(buf,err,1,fout)!=1) + { + printf("error in writing extracted file\n"); + err=UNZ_ERRNO; + break; + } + } + while (err>0); + if (fout) + fclose(fout); + + if (err==0) + change_file_date(write_filename,file_info.dosDate, + file_info.tmu_date); + } + + if (err==UNZ_OK) + { + err = unzCloseCurrentFile (uf); + if (err!=UNZ_OK) + { + printf("error %d with zipfile in unzCloseCurrentFile\n",err); + } + } + else + unzCloseCurrentFile(uf); /* don't lose the error */ + } + + free(buf); + return err; +} + + +int do_extract(uf,opt_extract_without_path,opt_overwrite,password) + unzFile uf; + int opt_extract_without_path; + int opt_overwrite; + const char* password; +{ + uLong i; + unz_global_info gi; + int err; + FILE* fout=NULL; + + err = unzGetGlobalInfo (uf,&gi); + if (err!=UNZ_OK) + printf("error %d with zipfile in unzGetGlobalInfo \n",err); + + for (i=0;i +#include +#include +#include +#include +#include + +#ifdef unix +# include +# include +# include +# include +#else +# include +# include +#endif + +#include "zip.h" + +#ifdef WIN32 +#define USEWIN32IOAPI +#include "iowin32.h" +#endif + + + +#define WRITEBUFFERSIZE (16384) +#define MAXFILENAME (256) + +#ifdef WIN32 +uLong filetime(f, tmzip, dt) + char *f; /* name of file to get info on */ + tm_zip *tmzip; /* return value: access, modific. and creation times */ + uLong *dt; /* dostime */ +{ + int ret = 0; + { + FILETIME ftLocal; + HANDLE hFind; + WIN32_FIND_DATA ff32; + + hFind = FindFirstFile(f,&ff32); + if (hFind != INVALID_HANDLE_VALUE) + { + FileTimeToLocalFileTime(&(ff32.ftLastWriteTime),&ftLocal); + FileTimeToDosDateTime(&ftLocal,((LPWORD)dt)+1,((LPWORD)dt)+0); + FindClose(hFind); + ret = 1; + } + } + return ret; +} +#else +#ifdef unix +uLong filetime(f, tmzip, dt) + char *f; /* name of file to get info on */ + tm_zip *tmzip; /* return value: access, modific. and creation times */ + uLong *dt; /* dostime */ +{ + int ret=0; + struct stat s; /* results of stat() */ + struct tm* filedate; + time_t tm_t=0; + + if (strcmp(f,"-")!=0) + { + char name[MAXFILENAME+1]; + int len = strlen(f); + if (len > MAXFILENAME) + len = MAXFILENAME; + + strncpy(name, f,MAXFILENAME-1); + /* strncpy doesnt append the trailing NULL, of the string is too long. */ + name[ MAXFILENAME ] = '\0'; + + if (name[len - 1] == '/') + name[len - 1] = '\0'; + /* not all systems allow stat'ing a file with / appended */ + if (stat(name,&s)==0) + { + tm_t = s.st_mtime; + ret = 1; + } + } + filedate = localtime(&tm_t); + + tmzip->tm_sec = filedate->tm_sec; + tmzip->tm_min = filedate->tm_min; + tmzip->tm_hour = filedate->tm_hour; + tmzip->tm_mday = filedate->tm_mday; + tmzip->tm_mon = filedate->tm_mon ; + tmzip->tm_year = filedate->tm_year; + + return ret; +} +#else +uLong filetime(f, tmzip, dt) + char *f; /* name of file to get info on */ + tm_zip *tmzip; /* return value: access, modific. and creation times */ + uLong *dt; /* dostime */ +{ + return 0; +} +#endif +#endif + + + + +int check_exist_file(filename) + const char* filename; +{ + FILE* ftestexist; + int ret = 1; + ftestexist = fopen(filename,"rb"); + if (ftestexist==NULL) + ret = 0; + else + fclose(ftestexist); + return ret; +} + +void do_banner() +{ + printf("MiniZip 1.01b, demo of zLib + Zip package written by Gilles Vollant\n"); + printf("more info at http://www.winimage.com/zLibDll/unzip.html\n\n"); +} + +void do_help() +{ + printf("Usage : minizip [-o] [-a] [-0 to -9] [-p password] file.zip [files_to_add]\n\n" \ + " -o Overwrite existing file.zip\n" \ + " -a Append to existing file.zip\n" \ + " -0 Store only\n" \ + " -1 Compress faster\n" \ + " -9 Compress better\n\n"); +} + +/* calculate the CRC32 of a file, + because to encrypt a file, we need known the CRC32 of the file before */ +int getFileCrc(const char* filenameinzip,void*buf,unsigned long size_buf,unsigned long* result_crc) +{ + unsigned long calculate_crc=0; + int err=ZIP_OK; + FILE * fin = fopen(filenameinzip,"rb"); + unsigned long size_read = 0; + unsigned long total_read = 0; + if (fin==NULL) + { + err = ZIP_ERRNO; + } + + if (err == ZIP_OK) + do + { + err = ZIP_OK; + size_read = (int)fread(buf,1,size_buf,fin); + if (size_read < size_buf) + if (feof(fin)==0) + { + printf("error in reading %s\n",filenameinzip); + err = ZIP_ERRNO; + } + + if (size_read>0) + calculate_crc = crc32(calculate_crc,buf,size_read); + total_read += size_read; + + } while ((err == ZIP_OK) && (size_read>0)); + + if (fin) + fclose(fin); + + *result_crc=calculate_crc; + printf("file %s crc %x\n",filenameinzip,calculate_crc); + return err; +} + +int main(argc,argv) + int argc; + char *argv[]; +{ + int i; + int opt_overwrite=0; + int opt_compress_level=Z_DEFAULT_COMPRESSION; + int zipfilenamearg = 0; + char filename_try[MAXFILENAME+16]; + int zipok; + int err=0; + int size_buf=0; + void* buf=NULL; + const char* password=NULL; + + + do_banner(); + if (argc==1) + { + do_help(); + return 0; + } + else + { + for (i=1;i='0') && (c<='9')) + opt_compress_level = c-'0'; + + if (((c=='p') || (c=='P')) && (i+1='a') && (rep<='z')) + rep -= 0x20; + } + while ((rep!='Y') && (rep!='N') && (rep!='A')); + if (rep=='N') + zipok = 0; + if (rep=='A') + opt_overwrite = 2; + } + } + + if (zipok==1) + { + zipFile zf; + int errclose; +# ifdef USEWIN32IOAPI + zlib_filefunc_def ffunc; + fill_win32_filefunc(&ffunc); + zf = zipOpen2(filename_try,(opt_overwrite==2) ? 2 : 0,NULL,&ffunc); +# else + zf = zipOpen(filename_try,(opt_overwrite==2) ? 2 : 0); +# endif + + if (zf == NULL) + { + printf("error opening %s\n",filename_try); + err= ZIP_ERRNO; + } + else + printf("creating %s\n",filename_try); + + for (i=zipfilenamearg+1;(i='0') || (argv[i][1]<='9'))) && + (strlen(argv[i]) == 2))) + { + FILE * fin; + int size_read; + const char* filenameinzip = argv[i]; + zip_fileinfo zi; + unsigned long crcFile=0; + + zi.tmz_date.tm_sec = zi.tmz_date.tm_min = zi.tmz_date.tm_hour = + zi.tmz_date.tm_mday = zi.tmz_date.tm_mon = zi.tmz_date.tm_year = 0; + zi.dosDate = 0; + zi.internal_fa = 0; + zi.external_fa = 0; + filetime(filenameinzip,&zi.tmz_date,&zi.dosDate); + +/* + err = zipOpenNewFileInZip(zf,filenameinzip,&zi, + NULL,0,NULL,0,NULL / * comment * /, + (opt_compress_level != 0) ? Z_DEFLATED : 0, + opt_compress_level); +*/ + if ((password != NULL) && (err==ZIP_OK)) + err = getFileCrc(filenameinzip,buf,size_buf,&crcFile); + + err = zipOpenNewFileInZip3(zf,filenameinzip,&zi, + NULL,0,NULL,0,NULL /* comment*/, + (opt_compress_level != 0) ? Z_DEFLATED : 0, + opt_compress_level,0, + /* -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, */ + -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, + password,crcFile); + + if (err != ZIP_OK) + printf("error in opening %s in zipfile\n",filenameinzip); + else + { + fin = fopen(filenameinzip,"rb"); + if (fin==NULL) + { + err=ZIP_ERRNO; + printf("error in opening %s for reading\n",filenameinzip); + } + } + + if (err == ZIP_OK) + do + { + err = ZIP_OK; + size_read = (int)fread(buf,1,size_buf,fin); + if (size_read < size_buf) + if (feof(fin)==0) + { + printf("error in reading %s\n",filenameinzip); + err = ZIP_ERRNO; + } + + if (size_read>0) + { + err = zipWriteInFileInZip (zf,buf,size_read); + if (err<0) + { + printf("error in writing %s in the zipfile\n", + filenameinzip); + } + + } + } while ((err == ZIP_OK) && (size_read>0)); + + if (fin) + fclose(fin); + + if (err<0) + err=ZIP_ERRNO; + else + { + err = zipCloseFileInZip(zf); + if (err!=ZIP_OK) + printf("error in closing %s in the zipfile\n", + filenameinzip); + } + } + } + errclose = zipClose(zf,NULL); + if (errclose != ZIP_OK) + printf("error in closing %s\n",filename_try); + } + else + { + do_help(); + } + + free(buf); + return 0; +} Added: external/zlib/contrib/minizip/mztools.c ============================================================================== --- (empty file) +++ external/zlib/contrib/minizip/mztools.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,281 @@ +/* + Additional tools for Minizip + Code: Xavier Roche '2004 + License: Same as ZLIB (www.gzip.org) +*/ + +/* Code */ +#include +#include +#include +#include "zlib.h" +#include "unzip.h" + +#define READ_8(adr) ((unsigned char)*(adr)) +#define READ_16(adr) ( READ_8(adr) | (READ_8(adr+1) << 8) ) +#define READ_32(adr) ( READ_16(adr) | (READ_16((adr)+2) << 16) ) + +#define WRITE_8(buff, n) do { \ + *((unsigned char*)(buff)) = (unsigned char) ((n) & 0xff); \ +} while(0) +#define WRITE_16(buff, n) do { \ + WRITE_8((unsigned char*)(buff), n); \ + WRITE_8(((unsigned char*)(buff)) + 1, (n) >> 8); \ +} while(0) +#define WRITE_32(buff, n) do { \ + WRITE_16((unsigned char*)(buff), (n) & 0xffff); \ + WRITE_16((unsigned char*)(buff) + 2, (n) >> 16); \ +} while(0) + +extern int ZEXPORT unzRepair(file, fileOut, fileOutTmp, nRecovered, bytesRecovered) +const char* file; +const char* fileOut; +const char* fileOutTmp; +uLong* nRecovered; +uLong* bytesRecovered; +{ + int err = Z_OK; + FILE* fpZip = fopen(file, "rb"); + FILE* fpOut = fopen(fileOut, "wb"); + FILE* fpOutCD = fopen(fileOutTmp, "wb"); + if (fpZip != NULL && fpOut != NULL) { + int entries = 0; + uLong totalBytes = 0; + char header[30]; + char filename[256]; + char extra[1024]; + int offset = 0; + int offsetCD = 0; + while ( fread(header, 1, 30, fpZip) == 30 ) { + int currentOffset = offset; + + /* File entry */ + if (READ_32(header) == 0x04034b50) { + unsigned int version = READ_16(header + 4); + unsigned int gpflag = READ_16(header + 6); + unsigned int method = READ_16(header + 8); + unsigned int filetime = READ_16(header + 10); + unsigned int filedate = READ_16(header + 12); + unsigned int crc = READ_32(header + 14); /* crc */ + unsigned int cpsize = READ_32(header + 18); /* compressed size */ + unsigned int uncpsize = READ_32(header + 22); /* uncompressed sz */ + unsigned int fnsize = READ_16(header + 26); /* file name length */ + unsigned int extsize = READ_16(header + 28); /* extra field length */ + filename[0] = extra[0] = '\0'; + + /* Header */ + if (fwrite(header, 1, 30, fpOut) == 30) { + offset += 30; + } else { + err = Z_ERRNO; + break; + } + + /* Filename */ + if (fnsize > 0) { + if (fread(filename, 1, fnsize, fpZip) == fnsize) { + if (fwrite(filename, 1, fnsize, fpOut) == fnsize) { + offset += fnsize; + } else { + err = Z_ERRNO; + break; + } + } else { + err = Z_ERRNO; + break; + } + } else { + err = Z_STREAM_ERROR; + break; + } + + /* Extra field */ + if (extsize > 0) { + if (fread(extra, 1, extsize, fpZip) == extsize) { + if (fwrite(extra, 1, extsize, fpOut) == extsize) { + offset += extsize; + } else { + err = Z_ERRNO; + break; + } + } else { + err = Z_ERRNO; + break; + } + } + + /* Data */ + { + int dataSize = cpsize; + if (dataSize == 0) { + dataSize = uncpsize; + } + if (dataSize > 0) { + char* data = malloc(dataSize); + if (data != NULL) { + if ((int)fread(data, 1, dataSize, fpZip) == dataSize) { + if ((int)fwrite(data, 1, dataSize, fpOut) == dataSize) { + offset += dataSize; + totalBytes += dataSize; + } else { + err = Z_ERRNO; + } + } else { + err = Z_ERRNO; + } + free(data); + if (err != Z_OK) { + break; + } + } else { + err = Z_MEM_ERROR; + break; + } + } + } + + /* Central directory entry */ + { + char header[46]; + char* comment = ""; + int comsize = (int) strlen(comment); + WRITE_32(header, 0x02014b50); + WRITE_16(header + 4, version); + WRITE_16(header + 6, version); + WRITE_16(header + 8, gpflag); + WRITE_16(header + 10, method); + WRITE_16(header + 12, filetime); + WRITE_16(header + 14, filedate); + WRITE_32(header + 16, crc); + WRITE_32(header + 20, cpsize); + WRITE_32(header + 24, uncpsize); + WRITE_16(header + 28, fnsize); + WRITE_16(header + 30, extsize); + WRITE_16(header + 32, comsize); + WRITE_16(header + 34, 0); /* disk # */ + WRITE_16(header + 36, 0); /* int attrb */ + WRITE_32(header + 38, 0); /* ext attrb */ + WRITE_32(header + 42, currentOffset); + /* Header */ + if (fwrite(header, 1, 46, fpOutCD) == 46) { + offsetCD += 46; + + /* Filename */ + if (fnsize > 0) { + if (fwrite(filename, 1, fnsize, fpOutCD) == fnsize) { + offsetCD += fnsize; + } else { + err = Z_ERRNO; + break; + } + } else { + err = Z_STREAM_ERROR; + break; + } + + /* Extra field */ + if (extsize > 0) { + if (fwrite(extra, 1, extsize, fpOutCD) == extsize) { + offsetCD += extsize; + } else { + err = Z_ERRNO; + break; + } + } + + /* Comment field */ + if (comsize > 0) { + if ((int)fwrite(comment, 1, comsize, fpOutCD) == comsize) { + offsetCD += comsize; + } else { + err = Z_ERRNO; + break; + } + } + + + } else { + err = Z_ERRNO; + break; + } + } + + /* Success */ + entries++; + + } else { + break; + } + } + + /* Final central directory */ + { + int entriesZip = entries; + char header[22]; + char* comment = ""; // "ZIP File recovered by zlib/minizip/mztools"; + int comsize = (int) strlen(comment); + if (entriesZip > 0xffff) { + entriesZip = 0xffff; + } + WRITE_32(header, 0x06054b50); + WRITE_16(header + 4, 0); /* disk # */ + WRITE_16(header + 6, 0); /* disk # */ + WRITE_16(header + 8, entriesZip); /* hack */ + WRITE_16(header + 10, entriesZip); /* hack */ + WRITE_32(header + 12, offsetCD); /* size of CD */ + WRITE_32(header + 16, offset); /* offset to CD */ + WRITE_16(header + 20, comsize); /* comment */ + + /* Header */ + if (fwrite(header, 1, 22, fpOutCD) == 22) { + + /* Comment field */ + if (comsize > 0) { + if ((int)fwrite(comment, 1, comsize, fpOutCD) != comsize) { + err = Z_ERRNO; + } + } + + } else { + err = Z_ERRNO; + } + } + + /* Final merge (file + central directory) */ + fclose(fpOutCD); + if (err == Z_OK) { + fpOutCD = fopen(fileOutTmp, "rb"); + if (fpOutCD != NULL) { + int nRead; + char buffer[8192]; + while ( (nRead = (int)fread(buffer, 1, sizeof(buffer), fpOutCD)) > 0) { + if ((int)fwrite(buffer, 1, nRead, fpOut) != nRead) { + err = Z_ERRNO; + break; + } + } + fclose(fpOutCD); + } + } + + /* Close */ + fclose(fpZip); + fclose(fpOut); + + /* Wipe temporary file */ + (void)remove(fileOutTmp); + + /* Number of recovered entries */ + if (err == Z_OK) { + if (nRecovered != NULL) { + *nRecovered = entries; + } + if (bytesRecovered != NULL) { + *bytesRecovered = totalBytes; + } + } + } else { + err = Z_STREAM_ERROR; + } + return err; +} Added: external/zlib/contrib/minizip/mztools.h ============================================================================== --- (empty file) +++ external/zlib/contrib/minizip/mztools.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,31 @@ +/* + Additional tools for Minizip + Code: Xavier Roche '2004 + License: Same as ZLIB (www.gzip.org) +*/ + +#ifndef _zip_tools_H +#define _zip_tools_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _ZLIB_H +#include "zlib.h" +#endif + +#include "unzip.h" + +/* Repair a ZIP file (missing central directory) + file: file to recover + fileOut: output file after recovery + fileOutTmp: temporary file name used for recovery +*/ +extern int ZEXPORT unzRepair(const char* file, + const char* fileOut, + const char* fileOutTmp, + uLong* nRecovered, + uLong* bytesRecovered); + +#endif Added: external/zlib/contrib/minizip/unzip.c ============================================================================== --- (empty file) +++ external/zlib/contrib/minizip/unzip.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,1598 @@ +/* unzip.c -- IO for uncompress .zip files using zlib + Version 1.01e, February 12th, 2005 + + Copyright (C) 1998-2005 Gilles Vollant + + Read unzip.h for more info +*/ + +/* Decryption code comes from crypt.c by Info-ZIP but has been greatly reduced in terms of +compatibility with older software. The following is from the original crypt.c. Code +woven in by Terry Thorsen 1/2003. +*/ +/* + Copyright (c) 1990-2000 Info-ZIP. All rights reserved. + + See the accompanying file LICENSE, version 2000-Apr-09 or later + (the contents of which are also included in zip.h) for terms of use. + If, for some reason, all these files are missing, the Info-ZIP license + also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html +*/ +/* + crypt.c (full version) by Info-ZIP. Last revised: [see crypt.h] + + The encryption/decryption parts of this source code (as opposed to the + non-echoing password parts) were originally written in Europe. The + whole source package can be freely distributed, including from the USA. + (Prior to January 2000, re-export from the US was a violation of US law.) + */ + +/* + This encryption code is a direct transcription of the algorithm from + Roger Schlafly, described by Phil Katz in the file appnote.txt. This + file (appnote.txt) is distributed with the PKZIP program (even in the + version without encryption capabilities). + */ + + +#include +#include +#include +#include "zlib.h" +#include "unzip.h" + +#ifdef STDC +# include +# include +# include +#endif +#ifdef NO_ERRNO_H + extern int errno; +#else +# include +#endif + + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + + +#ifndef CASESENSITIVITYDEFAULT_NO +# if !defined(unix) && !defined(CASESENSITIVITYDEFAULT_YES) +# define CASESENSITIVITYDEFAULT_NO +# endif +#endif + + +#ifndef UNZ_BUFSIZE +#define UNZ_BUFSIZE (16384) +#endif + +#ifndef UNZ_MAXFILENAMEINZIP +#define UNZ_MAXFILENAMEINZIP (256) +#endif + +#ifndef ALLOC +# define ALLOC(size) (malloc(size)) +#endif +#ifndef TRYFREE +# define TRYFREE(p) {if (p) free(p);} +#endif + +#define SIZECENTRALDIRITEM (0x2e) +#define SIZEZIPLOCALHEADER (0x1e) + + + + +const char unz_copyright[] = + " unzip 1.01 Copyright 1998-2004 Gilles Vollant - http://www.winimage.com/zLibDll"; + +/* unz_file_info_interntal contain internal info about a file in zipfile*/ +typedef struct unz_file_info_internal_s +{ + uLong offset_curfile;/* relative offset of local header 4 bytes */ +} unz_file_info_internal; + + +/* file_in_zip_read_info_s contain internal information about a file in zipfile, + when reading and decompress it */ +typedef struct +{ + char *read_buffer; /* internal buffer for compressed data */ + z_stream stream; /* zLib stream structure for inflate */ + + uLong pos_in_zipfile; /* position in byte on the zipfile, for fseek*/ + uLong stream_initialised; /* flag set if stream structure is initialised*/ + + uLong offset_local_extrafield;/* offset of the local extra field */ + uInt size_local_extrafield;/* size of the local extra field */ + uLong pos_local_extrafield; /* position in the local extra field in read*/ + + uLong crc32; /* crc32 of all data uncompressed */ + uLong crc32_wait; /* crc32 we must obtain after decompress all */ + uLong rest_read_compressed; /* number of byte to be decompressed */ + uLong rest_read_uncompressed;/*number of byte to be obtained after decomp*/ + zlib_filefunc_def z_filefunc; + voidpf filestream; /* io structore of the zipfile */ + uLong compression_method; /* compression method (0==store) */ + uLong byte_before_the_zipfile;/* byte before the zipfile, (>0 for sfx)*/ + int raw; +} file_in_zip_read_info_s; + + +/* unz_s contain internal information about the zipfile +*/ +typedef struct +{ + zlib_filefunc_def z_filefunc; + voidpf filestream; /* io structore of the zipfile */ + unz_global_info gi; /* public global information */ + uLong byte_before_the_zipfile;/* byte before the zipfile, (>0 for sfx)*/ + uLong num_file; /* number of the current file in the zipfile*/ + uLong pos_in_central_dir; /* pos of the current file in the central dir*/ + uLong current_file_ok; /* flag about the usability of the current file*/ + uLong central_pos; /* position of the beginning of the central dir*/ + + uLong size_central_dir; /* size of the central directory */ + uLong offset_central_dir; /* offset of start of central directory with + respect to the starting disk number */ + + unz_file_info cur_file_info; /* public info about the current file in zip*/ + unz_file_info_internal cur_file_info_internal; /* private info about it*/ + file_in_zip_read_info_s* pfile_in_zip_read; /* structure about the current + file if we are decompressing it */ + int encrypted; +# ifndef NOUNCRYPT + unsigned long keys[3]; /* keys defining the pseudo-random sequence */ + const unsigned long* pcrc_32_tab; +# endif +} unz_s; + + +#ifndef NOUNCRYPT +#include "crypt.h" +#endif + +/* =========================================================================== + Read a byte from a gz_stream; update next_in and avail_in. Return EOF + for end of file. + IN assertion: the stream s has been sucessfully opened for reading. +*/ + + +local int unzlocal_getByte OF(( + const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream, + int *pi)); + +local int unzlocal_getByte(pzlib_filefunc_def,filestream,pi) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; + int *pi; +{ + unsigned char c; + int err = (int)ZREAD(*pzlib_filefunc_def,filestream,&c,1); + if (err==1) + { + *pi = (int)c; + return UNZ_OK; + } + else + { + if (ZERROR(*pzlib_filefunc_def,filestream)) + return UNZ_ERRNO; + else + return UNZ_EOF; + } +} + + +/* =========================================================================== + Reads a long in LSB order from the given gz_stream. Sets +*/ +local int unzlocal_getShort OF(( + const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream, + uLong *pX)); + +local int unzlocal_getShort (pzlib_filefunc_def,filestream,pX) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; + uLong *pX; +{ + uLong x ; + int i; + int err; + + err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i); + x = (uLong)i; + + if (err==UNZ_OK) + err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<8; + + if (err==UNZ_OK) + *pX = x; + else + *pX = 0; + return err; +} + +local int unzlocal_getLong OF(( + const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream, + uLong *pX)); + +local int unzlocal_getLong (pzlib_filefunc_def,filestream,pX) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; + uLong *pX; +{ + uLong x ; + int i; + int err; + + err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i); + x = (uLong)i; + + if (err==UNZ_OK) + err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<8; + + if (err==UNZ_OK) + err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<16; + + if (err==UNZ_OK) + err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<24; + + if (err==UNZ_OK) + *pX = x; + else + *pX = 0; + return err; +} + + +/* My own strcmpi / strcasecmp */ +local int strcmpcasenosensitive_internal (fileName1,fileName2) + const char* fileName1; + const char* fileName2; +{ + for (;;) + { + char c1=*(fileName1++); + char c2=*(fileName2++); + if ((c1>='a') && (c1<='z')) + c1 -= 0x20; + if ((c2>='a') && (c2<='z')) + c2 -= 0x20; + if (c1=='\0') + return ((c2=='\0') ? 0 : -1); + if (c2=='\0') + return 1; + if (c1c2) + return 1; + } +} + + +#ifdef CASESENSITIVITYDEFAULT_NO +#define CASESENSITIVITYDEFAULTVALUE 2 +#else +#define CASESENSITIVITYDEFAULTVALUE 1 +#endif + +#ifndef STRCMPCASENOSENTIVEFUNCTION +#define STRCMPCASENOSENTIVEFUNCTION strcmpcasenosensitive_internal +#endif + +/* + Compare two filename (fileName1,fileName2). + If iCaseSenisivity = 1, comparision is case sensitivity (like strcmp) + If iCaseSenisivity = 2, comparision is not case sensitivity (like strcmpi + or strcasecmp) + If iCaseSenisivity = 0, case sensitivity is defaut of your operating system + (like 1 on Unix, 2 on Windows) + +*/ +extern int ZEXPORT unzStringFileNameCompare (fileName1,fileName2,iCaseSensitivity) + const char* fileName1; + const char* fileName2; + int iCaseSensitivity; +{ + if (iCaseSensitivity==0) + iCaseSensitivity=CASESENSITIVITYDEFAULTVALUE; + + if (iCaseSensitivity==1) + return strcmp(fileName1,fileName2); + + return STRCMPCASENOSENTIVEFUNCTION(fileName1,fileName2); +} + +#ifndef BUFREADCOMMENT +#define BUFREADCOMMENT (0x400) +#endif + +/* + Locate the Central directory of a zipfile (at the end, just before + the global comment) +*/ +local uLong unzlocal_SearchCentralDir OF(( + const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream)); + +local uLong unzlocal_SearchCentralDir(pzlib_filefunc_def,filestream) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; +{ + unsigned char* buf; + uLong uSizeFile; + uLong uBackRead; + uLong uMaxBack=0xffff; /* maximum size of global comment */ + uLong uPosFound=0; + + if (ZSEEK(*pzlib_filefunc_def,filestream,0,ZLIB_FILEFUNC_SEEK_END) != 0) + return 0; + + + uSizeFile = ZTELL(*pzlib_filefunc_def,filestream); + + if (uMaxBack>uSizeFile) + uMaxBack = uSizeFile; + + buf = (unsigned char*)ALLOC(BUFREADCOMMENT+4); + if (buf==NULL) + return 0; + + uBackRead = 4; + while (uBackReaduMaxBack) + uBackRead = uMaxBack; + else + uBackRead+=BUFREADCOMMENT; + uReadPos = uSizeFile-uBackRead ; + + uReadSize = ((BUFREADCOMMENT+4) < (uSizeFile-uReadPos)) ? + (BUFREADCOMMENT+4) : (uSizeFile-uReadPos); + if (ZSEEK(*pzlib_filefunc_def,filestream,uReadPos,ZLIB_FILEFUNC_SEEK_SET)!=0) + break; + + if (ZREAD(*pzlib_filefunc_def,filestream,buf,uReadSize)!=uReadSize) + break; + + for (i=(int)uReadSize-3; (i--)>0;) + if (((*(buf+i))==0x50) && ((*(buf+i+1))==0x4b) && + ((*(buf+i+2))==0x05) && ((*(buf+i+3))==0x06)) + { + uPosFound = uReadPos+i; + break; + } + + if (uPosFound!=0) + break; + } + TRYFREE(buf); + return uPosFound; +} + +/* + Open a Zip file. path contain the full pathname (by example, + on a Windows NT computer "c:\\test\\zlib114.zip" or on an Unix computer + "zlib/zlib114.zip". + If the zipfile cannot be opened (file doesn't exist or in not valid), the + return value is NULL. + Else, the return value is a unzFile Handle, usable with other function + of this unzip package. +*/ +extern unzFile ZEXPORT unzOpen2 (path, pzlib_filefunc_def) + const char *path; + zlib_filefunc_def* pzlib_filefunc_def; +{ + unz_s us; + unz_s *s; + uLong central_pos,uL; + + uLong number_disk; /* number of the current dist, used for + spaning ZIP, unsupported, always 0*/ + uLong number_disk_with_CD; /* number the the disk with central dir, used + for spaning ZIP, unsupported, always 0*/ + uLong number_entry_CD; /* total number of entries in + the central dir + (same than number_entry on nospan) */ + + int err=UNZ_OK; + + if (unz_copyright[0]!=' ') + return NULL; + + if (pzlib_filefunc_def==NULL) + fill_fopen_filefunc(&us.z_filefunc); + else + us.z_filefunc = *pzlib_filefunc_def; + + us.filestream= (*(us.z_filefunc.zopen_file))(us.z_filefunc.opaque, + path, + ZLIB_FILEFUNC_MODE_READ | + ZLIB_FILEFUNC_MODE_EXISTING); + if (us.filestream==NULL) + return NULL; + + central_pos = unzlocal_SearchCentralDir(&us.z_filefunc,us.filestream); + if (central_pos==0) + err=UNZ_ERRNO; + + if (ZSEEK(us.z_filefunc, us.filestream, + central_pos,ZLIB_FILEFUNC_SEEK_SET)!=0) + err=UNZ_ERRNO; + + /* the signature, already checked */ + if (unzlocal_getLong(&us.z_filefunc, us.filestream,&uL)!=UNZ_OK) + err=UNZ_ERRNO; + + /* number of this disk */ + if (unzlocal_getShort(&us.z_filefunc, us.filestream,&number_disk)!=UNZ_OK) + err=UNZ_ERRNO; + + /* number of the disk with the start of the central directory */ + if (unzlocal_getShort(&us.z_filefunc, us.filestream,&number_disk_with_CD)!=UNZ_OK) + err=UNZ_ERRNO; + + /* total number of entries in the central dir on this disk */ + if (unzlocal_getShort(&us.z_filefunc, us.filestream,&us.gi.number_entry)!=UNZ_OK) + err=UNZ_ERRNO; + + /* total number of entries in the central dir */ + if (unzlocal_getShort(&us.z_filefunc, us.filestream,&number_entry_CD)!=UNZ_OK) + err=UNZ_ERRNO; + + if ((number_entry_CD!=us.gi.number_entry) || + (number_disk_with_CD!=0) || + (number_disk!=0)) + err=UNZ_BADZIPFILE; + + /* size of the central directory */ + if (unzlocal_getLong(&us.z_filefunc, us.filestream,&us.size_central_dir)!=UNZ_OK) + err=UNZ_ERRNO; + + /* offset of start of central directory with respect to the + starting disk number */ + if (unzlocal_getLong(&us.z_filefunc, us.filestream,&us.offset_central_dir)!=UNZ_OK) + err=UNZ_ERRNO; + + /* zipfile comment length */ + if (unzlocal_getShort(&us.z_filefunc, us.filestream,&us.gi.size_comment)!=UNZ_OK) + err=UNZ_ERRNO; + + if ((central_pospfile_in_zip_read!=NULL) + unzCloseCurrentFile(file); + + ZCLOSE(s->z_filefunc, s->filestream); + TRYFREE(s); + return UNZ_OK; +} + + +/* + Write info about the ZipFile in the *pglobal_info structure. + No preparation of the structure is needed + return UNZ_OK if there is no problem. */ +extern int ZEXPORT unzGetGlobalInfo (file,pglobal_info) + unzFile file; + unz_global_info *pglobal_info; +{ + unz_s* s; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + *pglobal_info=s->gi; + return UNZ_OK; +} + + +/* + Translate date/time from Dos format to tm_unz (readable more easilty) +*/ +local void unzlocal_DosDateToTmuDate (ulDosDate, ptm) + uLong ulDosDate; + tm_unz* ptm; +{ + uLong uDate; + uDate = (uLong)(ulDosDate>>16); + ptm->tm_mday = (uInt)(uDate&0x1f) ; + ptm->tm_mon = (uInt)((((uDate)&0x1E0)/0x20)-1) ; + ptm->tm_year = (uInt)(((uDate&0x0FE00)/0x0200)+1980) ; + + ptm->tm_hour = (uInt) ((ulDosDate &0xF800)/0x800); + ptm->tm_min = (uInt) ((ulDosDate&0x7E0)/0x20) ; + ptm->tm_sec = (uInt) (2*(ulDosDate&0x1f)) ; +} + +/* + Get Info about the current file in the zipfile, with internal only info +*/ +local int unzlocal_GetCurrentFileInfoInternal OF((unzFile file, + unz_file_info *pfile_info, + unz_file_info_internal + *pfile_info_internal, + char *szFileName, + uLong fileNameBufferSize, + void *extraField, + uLong extraFieldBufferSize, + char *szComment, + uLong commentBufferSize)); + +local int unzlocal_GetCurrentFileInfoInternal (file, + pfile_info, + pfile_info_internal, + szFileName, fileNameBufferSize, + extraField, extraFieldBufferSize, + szComment, commentBufferSize) + unzFile file; + unz_file_info *pfile_info; + unz_file_info_internal *pfile_info_internal; + char *szFileName; + uLong fileNameBufferSize; + void *extraField; + uLong extraFieldBufferSize; + char *szComment; + uLong commentBufferSize; +{ + unz_s* s; + unz_file_info file_info; + unz_file_info_internal file_info_internal; + int err=UNZ_OK; + uLong uMagic; + long lSeek=0; + + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + if (ZSEEK(s->z_filefunc, s->filestream, + s->pos_in_central_dir+s->byte_before_the_zipfile, + ZLIB_FILEFUNC_SEEK_SET)!=0) + err=UNZ_ERRNO; + + + /* we check the magic */ + if (err==UNZ_OK) + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uMagic) != UNZ_OK) + err=UNZ_ERRNO; + else if (uMagic!=0x02014b50) + err=UNZ_BADZIPFILE; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.version) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.version_needed) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.flag) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.compression_method) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info.dosDate) != UNZ_OK) + err=UNZ_ERRNO; + + unzlocal_DosDateToTmuDate(file_info.dosDate,&file_info.tmu_date); + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info.crc) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info.compressed_size) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info.uncompressed_size) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.size_filename) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.size_file_extra) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.size_file_comment) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.disk_num_start) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.internal_fa) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info.external_fa) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info_internal.offset_curfile) != UNZ_OK) + err=UNZ_ERRNO; + + lSeek+=file_info.size_filename; + if ((err==UNZ_OK) && (szFileName!=NULL)) + { + uLong uSizeRead ; + if (file_info.size_filename0) && (fileNameBufferSize>0)) + if (ZREAD(s->z_filefunc, s->filestream,szFileName,uSizeRead)!=uSizeRead) + err=UNZ_ERRNO; + lSeek -= uSizeRead; + } + + + if ((err==UNZ_OK) && (extraField!=NULL)) + { + uLong uSizeRead ; + if (file_info.size_file_extraz_filefunc, s->filestream,lSeek,ZLIB_FILEFUNC_SEEK_CUR)==0) + lSeek=0; + else + err=UNZ_ERRNO; + if ((file_info.size_file_extra>0) && (extraFieldBufferSize>0)) + if (ZREAD(s->z_filefunc, s->filestream,extraField,uSizeRead)!=uSizeRead) + err=UNZ_ERRNO; + lSeek += file_info.size_file_extra - uSizeRead; + } + else + lSeek+=file_info.size_file_extra; + + + if ((err==UNZ_OK) && (szComment!=NULL)) + { + uLong uSizeRead ; + if (file_info.size_file_commentz_filefunc, s->filestream,lSeek,ZLIB_FILEFUNC_SEEK_CUR)==0) + lSeek=0; + else + err=UNZ_ERRNO; + if ((file_info.size_file_comment>0) && (commentBufferSize>0)) + if (ZREAD(s->z_filefunc, s->filestream,szComment,uSizeRead)!=uSizeRead) + err=UNZ_ERRNO; + lSeek+=file_info.size_file_comment - uSizeRead; + } + else + lSeek+=file_info.size_file_comment; + + if ((err==UNZ_OK) && (pfile_info!=NULL)) + *pfile_info=file_info; + + if ((err==UNZ_OK) && (pfile_info_internal!=NULL)) + *pfile_info_internal=file_info_internal; + + return err; +} + + + +/* + Write info about the ZipFile in the *pglobal_info structure. + No preparation of the structure is needed + return UNZ_OK if there is no problem. +*/ +extern int ZEXPORT unzGetCurrentFileInfo (file, + pfile_info, + szFileName, fileNameBufferSize, + extraField, extraFieldBufferSize, + szComment, commentBufferSize) + unzFile file; + unz_file_info *pfile_info; + char *szFileName; + uLong fileNameBufferSize; + void *extraField; + uLong extraFieldBufferSize; + char *szComment; + uLong commentBufferSize; +{ + return unzlocal_GetCurrentFileInfoInternal(file,pfile_info,NULL, + szFileName,fileNameBufferSize, + extraField,extraFieldBufferSize, + szComment,commentBufferSize); +} + +/* + Set the current file of the zipfile to the first file. + return UNZ_OK if there is no problem +*/ +extern int ZEXPORT unzGoToFirstFile (file) + unzFile file; +{ + int err=UNZ_OK; + unz_s* s; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + s->pos_in_central_dir=s->offset_central_dir; + s->num_file=0; + err=unzlocal_GetCurrentFileInfoInternal(file,&s->cur_file_info, + &s->cur_file_info_internal, + NULL,0,NULL,0,NULL,0); + s->current_file_ok = (err == UNZ_OK); + return err; +} + +/* + Set the current file of the zipfile to the next file. + return UNZ_OK if there is no problem + return UNZ_END_OF_LIST_OF_FILE if the actual file was the latest. +*/ +extern int ZEXPORT unzGoToNextFile (file) + unzFile file; +{ + unz_s* s; + int err; + + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + if (!s->current_file_ok) + return UNZ_END_OF_LIST_OF_FILE; + if (s->gi.number_entry != 0xffff) /* 2^16 files overflow hack */ + if (s->num_file+1==s->gi.number_entry) + return UNZ_END_OF_LIST_OF_FILE; + + s->pos_in_central_dir += SIZECENTRALDIRITEM + s->cur_file_info.size_filename + + s->cur_file_info.size_file_extra + s->cur_file_info.size_file_comment ; + s->num_file++; + err = unzlocal_GetCurrentFileInfoInternal(file,&s->cur_file_info, + &s->cur_file_info_internal, + NULL,0,NULL,0,NULL,0); + s->current_file_ok = (err == UNZ_OK); + return err; +} + + +/* + Try locate the file szFileName in the zipfile. + For the iCaseSensitivity signification, see unzipStringFileNameCompare + + return value : + UNZ_OK if the file is found. It becomes the current file. + UNZ_END_OF_LIST_OF_FILE if the file is not found +*/ +extern int ZEXPORT unzLocateFile (file, szFileName, iCaseSensitivity) + unzFile file; + const char *szFileName; + int iCaseSensitivity; +{ + unz_s* s; + int err; + + /* We remember the 'current' position in the file so that we can jump + * back there if we fail. + */ + unz_file_info cur_file_infoSaved; + unz_file_info_internal cur_file_info_internalSaved; + uLong num_fileSaved; + uLong pos_in_central_dirSaved; + + + if (file==NULL) + return UNZ_PARAMERROR; + + if (strlen(szFileName)>=UNZ_MAXFILENAMEINZIP) + return UNZ_PARAMERROR; + + s=(unz_s*)file; + if (!s->current_file_ok) + return UNZ_END_OF_LIST_OF_FILE; + + /* Save the current state */ + num_fileSaved = s->num_file; + pos_in_central_dirSaved = s->pos_in_central_dir; + cur_file_infoSaved = s->cur_file_info; + cur_file_info_internalSaved = s->cur_file_info_internal; + + err = unzGoToFirstFile(file); + + while (err == UNZ_OK) + { + char szCurrentFileName[UNZ_MAXFILENAMEINZIP+1]; + err = unzGetCurrentFileInfo(file,NULL, + szCurrentFileName,sizeof(szCurrentFileName)-1, + NULL,0,NULL,0); + if (err == UNZ_OK) + { + if (unzStringFileNameCompare(szCurrentFileName, + szFileName,iCaseSensitivity)==0) + return UNZ_OK; + err = unzGoToNextFile(file); + } + } + + /* We failed, so restore the state of the 'current file' to where we + * were. + */ + s->num_file = num_fileSaved ; + s->pos_in_central_dir = pos_in_central_dirSaved ; + s->cur_file_info = cur_file_infoSaved; + s->cur_file_info_internal = cur_file_info_internalSaved; + return err; +} + + +/* +/////////////////////////////////////////// +// Contributed by Ryan Haksi (mailto://cryogen at infoserve.net) +// I need random access +// +// Further optimization could be realized by adding an ability +// to cache the directory in memory. The goal being a single +// comprehensive file read to put the file I need in a memory. +*/ + +/* +typedef struct unz_file_pos_s +{ + uLong pos_in_zip_directory; // offset in file + uLong num_of_file; // # of file +} unz_file_pos; +*/ + +extern int ZEXPORT unzGetFilePos(file, file_pos) + unzFile file; + unz_file_pos* file_pos; +{ + unz_s* s; + + if (file==NULL || file_pos==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + if (!s->current_file_ok) + return UNZ_END_OF_LIST_OF_FILE; + + file_pos->pos_in_zip_directory = s->pos_in_central_dir; + file_pos->num_of_file = s->num_file; + + return UNZ_OK; +} + +extern int ZEXPORT unzGoToFilePos(file, file_pos) + unzFile file; + unz_file_pos* file_pos; +{ + unz_s* s; + int err; + + if (file==NULL || file_pos==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + + /* jump to the right spot */ + s->pos_in_central_dir = file_pos->pos_in_zip_directory; + s->num_file = file_pos->num_of_file; + + /* set the current file */ + err = unzlocal_GetCurrentFileInfoInternal(file,&s->cur_file_info, + &s->cur_file_info_internal, + NULL,0,NULL,0,NULL,0); + /* return results */ + s->current_file_ok = (err == UNZ_OK); + return err; +} + +/* +// Unzip Helper Functions - should be here? +/////////////////////////////////////////// +*/ + +/* + Read the local header of the current zipfile + Check the coherency of the local header and info in the end of central + directory about this file + store in *piSizeVar the size of extra info in local header + (filename and size of extra field data) +*/ +local int unzlocal_CheckCurrentFileCoherencyHeader (s,piSizeVar, + poffset_local_extrafield, + psize_local_extrafield) + unz_s* s; + uInt* piSizeVar; + uLong *poffset_local_extrafield; + uInt *psize_local_extrafield; +{ + uLong uMagic,uData,uFlags; + uLong size_filename; + uLong size_extra_field; + int err=UNZ_OK; + + *piSizeVar = 0; + *poffset_local_extrafield = 0; + *psize_local_extrafield = 0; + + if (ZSEEK(s->z_filefunc, s->filestream,s->cur_file_info_internal.offset_curfile + + s->byte_before_the_zipfile,ZLIB_FILEFUNC_SEEK_SET)!=0) + return UNZ_ERRNO; + + + if (err==UNZ_OK) + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uMagic) != UNZ_OK) + err=UNZ_ERRNO; + else if (uMagic!=0x04034b50) + err=UNZ_BADZIPFILE; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) + err=UNZ_ERRNO; +/* + else if ((err==UNZ_OK) && (uData!=s->cur_file_info.wVersion)) + err=UNZ_BADZIPFILE; +*/ + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&uFlags) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) + err=UNZ_ERRNO; + else if ((err==UNZ_OK) && (uData!=s->cur_file_info.compression_method)) + err=UNZ_BADZIPFILE; + + if ((err==UNZ_OK) && (s->cur_file_info.compression_method!=0) && + (s->cur_file_info.compression_method!=Z_DEFLATED)) + err=UNZ_BADZIPFILE; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) /* date/time */ + err=UNZ_ERRNO; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) /* crc */ + err=UNZ_ERRNO; + else if ((err==UNZ_OK) && (uData!=s->cur_file_info.crc) && + ((uFlags & 8)==0)) + err=UNZ_BADZIPFILE; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) /* size compr */ + err=UNZ_ERRNO; + else if ((err==UNZ_OK) && (uData!=s->cur_file_info.compressed_size) && + ((uFlags & 8)==0)) + err=UNZ_BADZIPFILE; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) /* size uncompr */ + err=UNZ_ERRNO; + else if ((err==UNZ_OK) && (uData!=s->cur_file_info.uncompressed_size) && + ((uFlags & 8)==0)) + err=UNZ_BADZIPFILE; + + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&size_filename) != UNZ_OK) + err=UNZ_ERRNO; + else if ((err==UNZ_OK) && (size_filename!=s->cur_file_info.size_filename)) + err=UNZ_BADZIPFILE; + + *piSizeVar += (uInt)size_filename; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&size_extra_field) != UNZ_OK) + err=UNZ_ERRNO; + *poffset_local_extrafield= s->cur_file_info_internal.offset_curfile + + SIZEZIPLOCALHEADER + size_filename; + *psize_local_extrafield = (uInt)size_extra_field; + + *piSizeVar += (uInt)size_extra_field; + + return err; +} + +/* + Open for reading data the current file in the zipfile. + If there is no error and the file is opened, the return value is UNZ_OK. +*/ +extern int ZEXPORT unzOpenCurrentFile3 (file, method, level, raw, password) + unzFile file; + int* method; + int* level; + int raw; + const char* password; +{ + int err=UNZ_OK; + uInt iSizeVar; + unz_s* s; + file_in_zip_read_info_s* pfile_in_zip_read_info; + uLong offset_local_extrafield; /* offset of the local extra field */ + uInt size_local_extrafield; /* size of the local extra field */ +# ifndef NOUNCRYPT + char source[12]; +# else + if (password != NULL) + return UNZ_PARAMERROR; +# endif + + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + if (!s->current_file_ok) + return UNZ_PARAMERROR; + + if (s->pfile_in_zip_read != NULL) + unzCloseCurrentFile(file); + + if (unzlocal_CheckCurrentFileCoherencyHeader(s,&iSizeVar, + &offset_local_extrafield,&size_local_extrafield)!=UNZ_OK) + return UNZ_BADZIPFILE; + + pfile_in_zip_read_info = (file_in_zip_read_info_s*) + ALLOC(sizeof(file_in_zip_read_info_s)); + if (pfile_in_zip_read_info==NULL) + return UNZ_INTERNALERROR; + + pfile_in_zip_read_info->read_buffer=(char*)ALLOC(UNZ_BUFSIZE); + pfile_in_zip_read_info->offset_local_extrafield = offset_local_extrafield; + pfile_in_zip_read_info->size_local_extrafield = size_local_extrafield; + pfile_in_zip_read_info->pos_local_extrafield=0; + pfile_in_zip_read_info->raw=raw; + + if (pfile_in_zip_read_info->read_buffer==NULL) + { + TRYFREE(pfile_in_zip_read_info); + return UNZ_INTERNALERROR; + } + + pfile_in_zip_read_info->stream_initialised=0; + + if (method!=NULL) + *method = (int)s->cur_file_info.compression_method; + + if (level!=NULL) + { + *level = 6; + switch (s->cur_file_info.flag & 0x06) + { + case 6 : *level = 1; break; + case 4 : *level = 2; break; + case 2 : *level = 9; break; + } + } + + if ((s->cur_file_info.compression_method!=0) && + (s->cur_file_info.compression_method!=Z_DEFLATED)) + err=UNZ_BADZIPFILE; + + pfile_in_zip_read_info->crc32_wait=s->cur_file_info.crc; + pfile_in_zip_read_info->crc32=0; + pfile_in_zip_read_info->compression_method = + s->cur_file_info.compression_method; + pfile_in_zip_read_info->filestream=s->filestream; + pfile_in_zip_read_info->z_filefunc=s->z_filefunc; + pfile_in_zip_read_info->byte_before_the_zipfile=s->byte_before_the_zipfile; + + pfile_in_zip_read_info->stream.total_out = 0; + + if ((s->cur_file_info.compression_method==Z_DEFLATED) && + (!raw)) + { + pfile_in_zip_read_info->stream.zalloc = (alloc_func)0; + pfile_in_zip_read_info->stream.zfree = (free_func)0; + pfile_in_zip_read_info->stream.opaque = (voidpf)0; + pfile_in_zip_read_info->stream.next_in = (voidpf)0; + pfile_in_zip_read_info->stream.avail_in = 0; + + err=inflateInit2(&pfile_in_zip_read_info->stream, -MAX_WBITS); + if (err == Z_OK) + pfile_in_zip_read_info->stream_initialised=1; + else + { + TRYFREE(pfile_in_zip_read_info); + return err; + } + /* windowBits is passed < 0 to tell that there is no zlib header. + * Note that in this case inflate *requires* an extra "dummy" byte + * after the compressed stream in order to complete decompression and + * return Z_STREAM_END. + * In unzip, i don't wait absolutely Z_STREAM_END because I known the + * size of both compressed and uncompressed data + */ + } + pfile_in_zip_read_info->rest_read_compressed = + s->cur_file_info.compressed_size ; + pfile_in_zip_read_info->rest_read_uncompressed = + s->cur_file_info.uncompressed_size ; + + + pfile_in_zip_read_info->pos_in_zipfile = + s->cur_file_info_internal.offset_curfile + SIZEZIPLOCALHEADER + + iSizeVar; + + pfile_in_zip_read_info->stream.avail_in = (uInt)0; + + s->pfile_in_zip_read = pfile_in_zip_read_info; + +# ifndef NOUNCRYPT + if (password != NULL) + { + int i; + s->pcrc_32_tab = get_crc_table(); + init_keys(password,s->keys,s->pcrc_32_tab); + if (ZSEEK(s->z_filefunc, s->filestream, + s->pfile_in_zip_read->pos_in_zipfile + + s->pfile_in_zip_read->byte_before_the_zipfile, + SEEK_SET)!=0) + return UNZ_INTERNALERROR; + if(ZREAD(s->z_filefunc, s->filestream,source, 12)<12) + return UNZ_INTERNALERROR; + + for (i = 0; i<12; i++) + zdecode(s->keys,s->pcrc_32_tab,source[i]); + + s->pfile_in_zip_read->pos_in_zipfile+=12; + s->encrypted=1; + } +# endif + + + return UNZ_OK; +} + +extern int ZEXPORT unzOpenCurrentFile (file) + unzFile file; +{ + return unzOpenCurrentFile3(file, NULL, NULL, 0, NULL); +} + +extern int ZEXPORT unzOpenCurrentFilePassword (file, password) + unzFile file; + const char* password; +{ + return unzOpenCurrentFile3(file, NULL, NULL, 0, password); +} + +extern int ZEXPORT unzOpenCurrentFile2 (file,method,level,raw) + unzFile file; + int* method; + int* level; + int raw; +{ + return unzOpenCurrentFile3(file, method, level, raw, NULL); +} + +/* + Read bytes from the current file. + buf contain buffer where data must be copied + len the size of buf. + + return the number of byte copied if somes bytes are copied + return 0 if the end of file was reached + return <0 with error code if there is an error + (UNZ_ERRNO for IO error, or zLib error for uncompress error) +*/ +extern int ZEXPORT unzReadCurrentFile (file, buf, len) + unzFile file; + voidp buf; + unsigned len; +{ + int err=UNZ_OK; + uInt iRead = 0; + unz_s* s; + file_in_zip_read_info_s* pfile_in_zip_read_info; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + pfile_in_zip_read_info=s->pfile_in_zip_read; + + if (pfile_in_zip_read_info==NULL) + return UNZ_PARAMERROR; + + + if ((pfile_in_zip_read_info->read_buffer == NULL)) + return UNZ_END_OF_LIST_OF_FILE; + if (len==0) + return 0; + + pfile_in_zip_read_info->stream.next_out = (Bytef*)buf; + + pfile_in_zip_read_info->stream.avail_out = (uInt)len; + + if ((len>pfile_in_zip_read_info->rest_read_uncompressed) && + (!(pfile_in_zip_read_info->raw))) + pfile_in_zip_read_info->stream.avail_out = + (uInt)pfile_in_zip_read_info->rest_read_uncompressed; + + if ((len>pfile_in_zip_read_info->rest_read_compressed+ + pfile_in_zip_read_info->stream.avail_in) && + (pfile_in_zip_read_info->raw)) + pfile_in_zip_read_info->stream.avail_out = + (uInt)pfile_in_zip_read_info->rest_read_compressed+ + pfile_in_zip_read_info->stream.avail_in; + + while (pfile_in_zip_read_info->stream.avail_out>0) + { + if ((pfile_in_zip_read_info->stream.avail_in==0) && + (pfile_in_zip_read_info->rest_read_compressed>0)) + { + uInt uReadThis = UNZ_BUFSIZE; + if (pfile_in_zip_read_info->rest_read_compressedrest_read_compressed; + if (uReadThis == 0) + return UNZ_EOF; + if (ZSEEK(pfile_in_zip_read_info->z_filefunc, + pfile_in_zip_read_info->filestream, + pfile_in_zip_read_info->pos_in_zipfile + + pfile_in_zip_read_info->byte_before_the_zipfile, + ZLIB_FILEFUNC_SEEK_SET)!=0) + return UNZ_ERRNO; + if (ZREAD(pfile_in_zip_read_info->z_filefunc, + pfile_in_zip_read_info->filestream, + pfile_in_zip_read_info->read_buffer, + uReadThis)!=uReadThis) + return UNZ_ERRNO; + + +# ifndef NOUNCRYPT + if(s->encrypted) + { + uInt i; + for(i=0;iread_buffer[i] = + zdecode(s->keys,s->pcrc_32_tab, + pfile_in_zip_read_info->read_buffer[i]); + } +# endif + + + pfile_in_zip_read_info->pos_in_zipfile += uReadThis; + + pfile_in_zip_read_info->rest_read_compressed-=uReadThis; + + pfile_in_zip_read_info->stream.next_in = + (Bytef*)pfile_in_zip_read_info->read_buffer; + pfile_in_zip_read_info->stream.avail_in = (uInt)uReadThis; + } + + if ((pfile_in_zip_read_info->compression_method==0) || (pfile_in_zip_read_info->raw)) + { + uInt uDoCopy,i ; + + if ((pfile_in_zip_read_info->stream.avail_in == 0) && + (pfile_in_zip_read_info->rest_read_compressed == 0)) + return (iRead==0) ? UNZ_EOF : iRead; + + if (pfile_in_zip_read_info->stream.avail_out < + pfile_in_zip_read_info->stream.avail_in) + uDoCopy = pfile_in_zip_read_info->stream.avail_out ; + else + uDoCopy = pfile_in_zip_read_info->stream.avail_in ; + + for (i=0;istream.next_out+i) = + *(pfile_in_zip_read_info->stream.next_in+i); + + pfile_in_zip_read_info->crc32 = crc32(pfile_in_zip_read_info->crc32, + pfile_in_zip_read_info->stream.next_out, + uDoCopy); + pfile_in_zip_read_info->rest_read_uncompressed-=uDoCopy; + pfile_in_zip_read_info->stream.avail_in -= uDoCopy; + pfile_in_zip_read_info->stream.avail_out -= uDoCopy; + pfile_in_zip_read_info->stream.next_out += uDoCopy; + pfile_in_zip_read_info->stream.next_in += uDoCopy; + pfile_in_zip_read_info->stream.total_out += uDoCopy; + iRead += uDoCopy; + } + else + { + uLong uTotalOutBefore,uTotalOutAfter; + const Bytef *bufBefore; + uLong uOutThis; + int flush=Z_SYNC_FLUSH; + + uTotalOutBefore = pfile_in_zip_read_info->stream.total_out; + bufBefore = pfile_in_zip_read_info->stream.next_out; + + /* + if ((pfile_in_zip_read_info->rest_read_uncompressed == + pfile_in_zip_read_info->stream.avail_out) && + (pfile_in_zip_read_info->rest_read_compressed == 0)) + flush = Z_FINISH; + */ + err=inflate(&pfile_in_zip_read_info->stream,flush); + + if ((err>=0) && (pfile_in_zip_read_info->stream.msg!=NULL)) + err = Z_DATA_ERROR; + + uTotalOutAfter = pfile_in_zip_read_info->stream.total_out; + uOutThis = uTotalOutAfter-uTotalOutBefore; + + pfile_in_zip_read_info->crc32 = + crc32(pfile_in_zip_read_info->crc32,bufBefore, + (uInt)(uOutThis)); + + pfile_in_zip_read_info->rest_read_uncompressed -= + uOutThis; + + iRead += (uInt)(uTotalOutAfter - uTotalOutBefore); + + if (err==Z_STREAM_END) + return (iRead==0) ? UNZ_EOF : iRead; + if (err!=Z_OK) + break; + } + } + + if (err==Z_OK) + return iRead; + return err; +} + + +/* + Give the current position in uncompressed data +*/ +extern z_off_t ZEXPORT unztell (file) + unzFile file; +{ + unz_s* s; + file_in_zip_read_info_s* pfile_in_zip_read_info; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + pfile_in_zip_read_info=s->pfile_in_zip_read; + + if (pfile_in_zip_read_info==NULL) + return UNZ_PARAMERROR; + + return (z_off_t)pfile_in_zip_read_info->stream.total_out; +} + + +/* + return 1 if the end of file was reached, 0 elsewhere +*/ +extern int ZEXPORT unzeof (file) + unzFile file; +{ + unz_s* s; + file_in_zip_read_info_s* pfile_in_zip_read_info; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + pfile_in_zip_read_info=s->pfile_in_zip_read; + + if (pfile_in_zip_read_info==NULL) + return UNZ_PARAMERROR; + + if (pfile_in_zip_read_info->rest_read_uncompressed == 0) + return 1; + else + return 0; +} + + + +/* + Read extra field from the current file (opened by unzOpenCurrentFile) + This is the local-header version of the extra field (sometimes, there is + more info in the local-header version than in the central-header) + + if buf==NULL, it return the size of the local extra field that can be read + + if buf!=NULL, len is the size of the buffer, the extra header is copied in + buf. + the return value is the number of bytes copied in buf, or (if <0) + the error code +*/ +extern int ZEXPORT unzGetLocalExtrafield (file,buf,len) + unzFile file; + voidp buf; + unsigned len; +{ + unz_s* s; + file_in_zip_read_info_s* pfile_in_zip_read_info; + uInt read_now; + uLong size_to_read; + + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + pfile_in_zip_read_info=s->pfile_in_zip_read; + + if (pfile_in_zip_read_info==NULL) + return UNZ_PARAMERROR; + + size_to_read = (pfile_in_zip_read_info->size_local_extrafield - + pfile_in_zip_read_info->pos_local_extrafield); + + if (buf==NULL) + return (int)size_to_read; + + if (len>size_to_read) + read_now = (uInt)size_to_read; + else + read_now = (uInt)len ; + + if (read_now==0) + return 0; + + if (ZSEEK(pfile_in_zip_read_info->z_filefunc, + pfile_in_zip_read_info->filestream, + pfile_in_zip_read_info->offset_local_extrafield + + pfile_in_zip_read_info->pos_local_extrafield, + ZLIB_FILEFUNC_SEEK_SET)!=0) + return UNZ_ERRNO; + + if (ZREAD(pfile_in_zip_read_info->z_filefunc, + pfile_in_zip_read_info->filestream, + buf,read_now)!=read_now) + return UNZ_ERRNO; + + return (int)read_now; +} + +/* + Close the file in zip opened with unzipOpenCurrentFile + Return UNZ_CRCERROR if all the file was read but the CRC is not good +*/ +extern int ZEXPORT unzCloseCurrentFile (file) + unzFile file; +{ + int err=UNZ_OK; + + unz_s* s; + file_in_zip_read_info_s* pfile_in_zip_read_info; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + pfile_in_zip_read_info=s->pfile_in_zip_read; + + if (pfile_in_zip_read_info==NULL) + return UNZ_PARAMERROR; + + + if ((pfile_in_zip_read_info->rest_read_uncompressed == 0) && + (!pfile_in_zip_read_info->raw)) + { + if (pfile_in_zip_read_info->crc32 != pfile_in_zip_read_info->crc32_wait) + err=UNZ_CRCERROR; + } + + + TRYFREE(pfile_in_zip_read_info->read_buffer); + pfile_in_zip_read_info->read_buffer = NULL; + if (pfile_in_zip_read_info->stream_initialised) + inflateEnd(&pfile_in_zip_read_info->stream); + + pfile_in_zip_read_info->stream_initialised = 0; + TRYFREE(pfile_in_zip_read_info); + + s->pfile_in_zip_read=NULL; + + return err; +} + + +/* + Get the global comment string of the ZipFile, in the szComment buffer. + uSizeBuf is the size of the szComment buffer. + return the number of byte copied or an error code <0 +*/ +extern int ZEXPORT unzGetGlobalComment (file, szComment, uSizeBuf) + unzFile file; + char *szComment; + uLong uSizeBuf; +{ + int err=UNZ_OK; + unz_s* s; + uLong uReadThis ; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + + uReadThis = uSizeBuf; + if (uReadThis>s->gi.size_comment) + uReadThis = s->gi.size_comment; + + if (ZSEEK(s->z_filefunc,s->filestream,s->central_pos+22,ZLIB_FILEFUNC_SEEK_SET)!=0) + return UNZ_ERRNO; + + if (uReadThis>0) + { + *szComment='\0'; + if (ZREAD(s->z_filefunc,s->filestream,szComment,uReadThis)!=uReadThis) + return UNZ_ERRNO; + } + + if ((szComment != NULL) && (uSizeBuf > s->gi.size_comment)) + *(szComment+s->gi.size_comment)='\0'; + return (int)uReadThis; +} + +/* Additions by RX '2004 */ +extern uLong ZEXPORT unzGetOffset (file) + unzFile file; +{ + unz_s* s; + + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + if (!s->current_file_ok) + return 0; + if (s->gi.number_entry != 0 && s->gi.number_entry != 0xffff) + if (s->num_file==s->gi.number_entry) + return 0; + return s->pos_in_central_dir; +} + +extern int ZEXPORT unzSetOffset (file, pos) + unzFile file; + uLong pos; +{ + unz_s* s; + int err; + + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + + s->pos_in_central_dir = pos; + s->num_file = s->gi.number_entry; /* hack */ + err = unzlocal_GetCurrentFileInfoInternal(file,&s->cur_file_info, + &s->cur_file_info_internal, + NULL,0,NULL,0,NULL,0); + s->current_file_ok = (err == UNZ_OK); + return err; +} Added: external/zlib/contrib/minizip/unzip.h ============================================================================== --- (empty file) +++ external/zlib/contrib/minizip/unzip.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,354 @@ +/* unzip.h -- IO for uncompress .zip files using zlib + Version 1.01e, February 12th, 2005 + + Copyright (C) 1998-2005 Gilles Vollant + + This unzip package allow extract file from .ZIP file, compatible with PKZip 2.04g + WinZip, InfoZip tools and compatible. + + Multi volume ZipFile (span) are not supported. + Encryption compatible with pkzip 2.04g only supported + Old compressions used by old PKZip 1.x are not supported + + + I WAIT FEEDBACK at mail info at winimage.com + Visit also http://www.winimage.com/zLibDll/unzip.htm for evolution + + Condition of use and distribution are the same than zlib : + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + +*/ + +/* for more info about .ZIP format, see + http://www.info-zip.org/pub/infozip/doc/appnote-981119-iz.zip + http://www.info-zip.org/pub/infozip/doc/ + PkWare has also a specification at : + ftp://ftp.pkware.com/probdesc.zip +*/ + +#ifndef _unz_H +#define _unz_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _ZLIB_H +#include "zlib.h" +#endif + +#ifndef _ZLIBIOAPI_H +#include "ioapi.h" +#endif + +#if defined(STRICTUNZIP) || defined(STRICTZIPUNZIP) +/* like the STRICT of WIN32, we define a pointer that cannot be converted + from (void*) without cast */ +typedef struct TagunzFile__ { int unused; } unzFile__; +typedef unzFile__ *unzFile; +#else +typedef voidp unzFile; +#endif + + +#define UNZ_OK (0) +#define UNZ_END_OF_LIST_OF_FILE (-100) +#define UNZ_ERRNO (Z_ERRNO) +#define UNZ_EOF (0) +#define UNZ_PARAMERROR (-102) +#define UNZ_BADZIPFILE (-103) +#define UNZ_INTERNALERROR (-104) +#define UNZ_CRCERROR (-105) + +/* tm_unz contain date/time info */ +typedef struct tm_unz_s +{ + uInt tm_sec; /* seconds after the minute - [0,59] */ + uInt tm_min; /* minutes after the hour - [0,59] */ + uInt tm_hour; /* hours since midnight - [0,23] */ + uInt tm_mday; /* day of the month - [1,31] */ + uInt tm_mon; /* months since January - [0,11] */ + uInt tm_year; /* years - [1980..2044] */ +} tm_unz; + +/* unz_global_info structure contain global data about the ZIPfile + These data comes from the end of central dir */ +typedef struct unz_global_info_s +{ + uLong number_entry; /* total number of entries in + the central dir on this disk */ + uLong size_comment; /* size of the global comment of the zipfile */ +} unz_global_info; + + +/* unz_file_info contain information about a file in the zipfile */ +typedef struct unz_file_info_s +{ + uLong version; /* version made by 2 bytes */ + uLong version_needed; /* version needed to extract 2 bytes */ + uLong flag; /* general purpose bit flag 2 bytes */ + uLong compression_method; /* compression method 2 bytes */ + uLong dosDate; /* last mod file date in Dos fmt 4 bytes */ + uLong crc; /* crc-32 4 bytes */ + uLong compressed_size; /* compressed size 4 bytes */ + uLong uncompressed_size; /* uncompressed size 4 bytes */ + uLong size_filename; /* filename length 2 bytes */ + uLong size_file_extra; /* extra field length 2 bytes */ + uLong size_file_comment; /* file comment length 2 bytes */ + + uLong disk_num_start; /* disk number start 2 bytes */ + uLong internal_fa; /* internal file attributes 2 bytes */ + uLong external_fa; /* external file attributes 4 bytes */ + + tm_unz tmu_date; +} unz_file_info; + +extern int ZEXPORT unzStringFileNameCompare OF ((const char* fileName1, + const char* fileName2, + int iCaseSensitivity)); +/* + Compare two filename (fileName1,fileName2). + If iCaseSenisivity = 1, comparision is case sensitivity (like strcmp) + If iCaseSenisivity = 2, comparision is not case sensitivity (like strcmpi + or strcasecmp) + If iCaseSenisivity = 0, case sensitivity is defaut of your operating system + (like 1 on Unix, 2 on Windows) +*/ + + +extern unzFile ZEXPORT unzOpen OF((const char *path)); +/* + Open a Zip file. path contain the full pathname (by example, + on a Windows XP computer "c:\\zlib\\zlib113.zip" or on an Unix computer + "zlib/zlib113.zip". + If the zipfile cannot be opened (file don't exist or in not valid), the + return value is NULL. + Else, the return value is a unzFile Handle, usable with other function + of this unzip package. +*/ + +extern unzFile ZEXPORT unzOpen2 OF((const char *path, + zlib_filefunc_def* pzlib_filefunc_def)); +/* + Open a Zip file, like unzOpen, but provide a set of file low level API + for read/write the zip file (see ioapi.h) +*/ + +extern int ZEXPORT unzClose OF((unzFile file)); +/* + Close a ZipFile opened with unzipOpen. + If there is files inside the .Zip opened with unzOpenCurrentFile (see later), + these files MUST be closed with unzipCloseCurrentFile before call unzipClose. + return UNZ_OK if there is no problem. */ + +extern int ZEXPORT unzGetGlobalInfo OF((unzFile file, + unz_global_info *pglobal_info)); +/* + Write info about the ZipFile in the *pglobal_info structure. + No preparation of the structure is needed + return UNZ_OK if there is no problem. */ + + +extern int ZEXPORT unzGetGlobalComment OF((unzFile file, + char *szComment, + uLong uSizeBuf)); +/* + Get the global comment string of the ZipFile, in the szComment buffer. + uSizeBuf is the size of the szComment buffer. + return the number of byte copied or an error code <0 +*/ + + +/***************************************************************************/ +/* Unzip package allow you browse the directory of the zipfile */ + +extern int ZEXPORT unzGoToFirstFile OF((unzFile file)); +/* + Set the current file of the zipfile to the first file. + return UNZ_OK if there is no problem +*/ + +extern int ZEXPORT unzGoToNextFile OF((unzFile file)); +/* + Set the current file of the zipfile to the next file. + return UNZ_OK if there is no problem + return UNZ_END_OF_LIST_OF_FILE if the actual file was the latest. +*/ + +extern int ZEXPORT unzLocateFile OF((unzFile file, + const char *szFileName, + int iCaseSensitivity)); +/* + Try locate the file szFileName in the zipfile. + For the iCaseSensitivity signification, see unzStringFileNameCompare + + return value : + UNZ_OK if the file is found. It becomes the current file. + UNZ_END_OF_LIST_OF_FILE if the file is not found +*/ + + +/* ****************************************** */ +/* Ryan supplied functions */ +/* unz_file_info contain information about a file in the zipfile */ +typedef struct unz_file_pos_s +{ + uLong pos_in_zip_directory; /* offset in zip file directory */ + uLong num_of_file; /* # of file */ +} unz_file_pos; + +extern int ZEXPORT unzGetFilePos( + unzFile file, + unz_file_pos* file_pos); + +extern int ZEXPORT unzGoToFilePos( + unzFile file, + unz_file_pos* file_pos); + +/* ****************************************** */ + +extern int ZEXPORT unzGetCurrentFileInfo OF((unzFile file, + unz_file_info *pfile_info, + char *szFileName, + uLong fileNameBufferSize, + void *extraField, + uLong extraFieldBufferSize, + char *szComment, + uLong commentBufferSize)); +/* + Get Info about the current file + if pfile_info!=NULL, the *pfile_info structure will contain somes info about + the current file + if szFileName!=NULL, the filemane string will be copied in szFileName + (fileNameBufferSize is the size of the buffer) + if extraField!=NULL, the extra field information will be copied in extraField + (extraFieldBufferSize is the size of the buffer). + This is the Central-header version of the extra field + if szComment!=NULL, the comment string of the file will be copied in szComment + (commentBufferSize is the size of the buffer) +*/ + +/***************************************************************************/ +/* for reading the content of the current zipfile, you can open it, read data + from it, and close it (you can close it before reading all the file) + */ + +extern int ZEXPORT unzOpenCurrentFile OF((unzFile file)); +/* + Open for reading data the current file in the zipfile. + If there is no error, the return value is UNZ_OK. +*/ + +extern int ZEXPORT unzOpenCurrentFilePassword OF((unzFile file, + const char* password)); +/* + Open for reading data the current file in the zipfile. + password is a crypting password + If there is no error, the return value is UNZ_OK. +*/ + +extern int ZEXPORT unzOpenCurrentFile2 OF((unzFile file, + int* method, + int* level, + int raw)); +/* + Same than unzOpenCurrentFile, but open for read raw the file (not uncompress) + if raw==1 + *method will receive method of compression, *level will receive level of + compression + note : you can set level parameter as NULL (if you did not want known level, + but you CANNOT set method parameter as NULL +*/ + +extern int ZEXPORT unzOpenCurrentFile3 OF((unzFile file, + int* method, + int* level, + int raw, + const char* password)); +/* + Same than unzOpenCurrentFile, but open for read raw the file (not uncompress) + if raw==1 + *method will receive method of compression, *level will receive level of + compression + note : you can set level parameter as NULL (if you did not want known level, + but you CANNOT set method parameter as NULL +*/ + + +extern int ZEXPORT unzCloseCurrentFile OF((unzFile file)); +/* + Close the file in zip opened with unzOpenCurrentFile + Return UNZ_CRCERROR if all the file was read but the CRC is not good +*/ + +extern int ZEXPORT unzReadCurrentFile OF((unzFile file, + voidp buf, + unsigned len)); +/* + Read bytes from the current file (opened by unzOpenCurrentFile) + buf contain buffer where data must be copied + len the size of buf. + + return the number of byte copied if somes bytes are copied + return 0 if the end of file was reached + return <0 with error code if there is an error + (UNZ_ERRNO for IO error, or zLib error for uncompress error) +*/ + +extern z_off_t ZEXPORT unztell OF((unzFile file)); +/* + Give the current position in uncompressed data +*/ + +extern int ZEXPORT unzeof OF((unzFile file)); +/* + return 1 if the end of file was reached, 0 elsewhere +*/ + +extern int ZEXPORT unzGetLocalExtrafield OF((unzFile file, + voidp buf, + unsigned len)); +/* + Read extra field from the current file (opened by unzOpenCurrentFile) + This is the local-header version of the extra field (sometimes, there is + more info in the local-header version than in the central-header) + + if buf==NULL, it return the size of the local extra field + + if buf!=NULL, len is the size of the buffer, the extra header is copied in + buf. + the return value is the number of bytes copied in buf, or (if <0) + the error code +*/ + +/***************************************************************************/ + +/* Get the current file offset */ +extern uLong ZEXPORT unzGetOffset (unzFile file); + +/* Set the current file offset */ +extern int ZEXPORT unzSetOffset (unzFile file, uLong pos); + + + +#ifdef __cplusplus +} +#endif + +#endif /* _unz_H */ Added: external/zlib/contrib/minizip/zip.c ============================================================================== --- (empty file) +++ external/zlib/contrib/minizip/zip.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,1219 @@ +/* zip.c -- IO on .zip files using zlib + Version 1.01e, February 12th, 2005 + + 27 Dec 2004 Rolf Kalbermatter + Modification to zipOpen2 to support globalComment retrieval. + + Copyright (C) 1998-2005 Gilles Vollant + + Read zip.h for more info +*/ + + +#include +#include +#include +#include +#include "zlib.h" +#include "zip.h" + +#ifdef STDC +# include +# include +# include +#endif +#ifdef NO_ERRNO_H + extern int errno; +#else +# include +#endif + + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + +#ifndef VERSIONMADEBY +# define VERSIONMADEBY (0x0) /* platform depedent */ +#endif + +#ifndef Z_BUFSIZE +#define Z_BUFSIZE (16384) +#endif + +#ifndef Z_MAXFILENAMEINZIP +#define Z_MAXFILENAMEINZIP (256) +#endif + +#ifndef ALLOC +# define ALLOC(size) (malloc(size)) +#endif +#ifndef TRYFREE +# define TRYFREE(p) {if (p) free(p);} +#endif + +/* +#define SIZECENTRALDIRITEM (0x2e) +#define SIZEZIPLOCALHEADER (0x1e) +*/ + +/* I've found an old Unix (a SunOS 4.1.3_U1) without all SEEK_* defined.... */ + +#ifndef SEEK_CUR +#define SEEK_CUR 1 +#endif + +#ifndef SEEK_END +#define SEEK_END 2 +#endif + +#ifndef SEEK_SET +#define SEEK_SET 0 +#endif + +#ifndef DEF_MEM_LEVEL +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +#endif +const char zip_copyright[] = + " zip 1.01 Copyright 1998-2004 Gilles Vollant - http://www.winimage.com/zLibDll"; + + +#define SIZEDATA_INDATABLOCK (4096-(4*4)) + +#define LOCALHEADERMAGIC (0x04034b50) +#define CENTRALHEADERMAGIC (0x02014b50) +#define ENDHEADERMAGIC (0x06054b50) + +#define FLAG_LOCALHEADER_OFFSET (0x06) +#define CRC_LOCALHEADER_OFFSET (0x0e) + +#define SIZECENTRALHEADER (0x2e) /* 46 */ + +typedef struct linkedlist_datablock_internal_s +{ + struct linkedlist_datablock_internal_s* next_datablock; + uLong avail_in_this_block; + uLong filled_in_this_block; + uLong unused; /* for future use and alignement */ + unsigned char data[SIZEDATA_INDATABLOCK]; +} linkedlist_datablock_internal; + +typedef struct linkedlist_data_s +{ + linkedlist_datablock_internal* first_block; + linkedlist_datablock_internal* last_block; +} linkedlist_data; + + +typedef struct +{ + z_stream stream; /* zLib stream structure for inflate */ + int stream_initialised; /* 1 is stream is initialised */ + uInt pos_in_buffered_data; /* last written byte in buffered_data */ + + uLong pos_local_header; /* offset of the local header of the file + currenty writing */ + char* central_header; /* central header data for the current file */ + uLong size_centralheader; /* size of the central header for cur file */ + uLong flag; /* flag of the file currently writing */ + + int method; /* compression method of file currenty wr.*/ + int raw; /* 1 for directly writing raw data */ + Byte buffered_data[Z_BUFSIZE];/* buffer contain compressed data to be writ*/ + uLong dosDate; + uLong crc32; + int encrypt; +#ifndef NOCRYPT + unsigned long keys[3]; /* keys defining the pseudo-random sequence */ + const unsigned long* pcrc_32_tab; + int crypt_header_size; +#endif +} curfile_info; + +typedef struct +{ + zlib_filefunc_def z_filefunc; + voidpf filestream; /* io structore of the zipfile */ + linkedlist_data central_dir;/* datablock with central dir in construction*/ + int in_opened_file_inzip; /* 1 if a file in the zip is currently writ.*/ + curfile_info ci; /* info on the file curretly writing */ + + uLong begin_pos; /* position of the beginning of the zipfile */ + uLong add_position_when_writting_offset; + uLong number_entry; +#ifndef NO_ADDFILEINEXISTINGZIP + char *globalcomment; +#endif +} zip_internal; + + + +#ifndef NOCRYPT +#define INCLUDECRYPTINGCODE_IFCRYPTALLOWED +#include "crypt.h" +#endif + +local linkedlist_datablock_internal* allocate_new_datablock() +{ + linkedlist_datablock_internal* ldi; + ldi = (linkedlist_datablock_internal*) + ALLOC(sizeof(linkedlist_datablock_internal)); + if (ldi!=NULL) + { + ldi->next_datablock = NULL ; + ldi->filled_in_this_block = 0 ; + ldi->avail_in_this_block = SIZEDATA_INDATABLOCK ; + } + return ldi; +} + +local void free_datablock(ldi) + linkedlist_datablock_internal* ldi; +{ + while (ldi!=NULL) + { + linkedlist_datablock_internal* ldinext = ldi->next_datablock; + TRYFREE(ldi); + ldi = ldinext; + } +} + +local void init_linkedlist(ll) + linkedlist_data* ll; +{ + ll->first_block = ll->last_block = NULL; +} + +local void free_linkedlist(ll) + linkedlist_data* ll; +{ + free_datablock(ll->first_block); + ll->first_block = ll->last_block = NULL; +} + + +local int add_data_in_datablock(ll,buf,len) + linkedlist_data* ll; + const void* buf; + uLong len; +{ + linkedlist_datablock_internal* ldi; + const unsigned char* from_copy; + + if (ll==NULL) + return ZIP_INTERNALERROR; + + if (ll->last_block == NULL) + { + ll->first_block = ll->last_block = allocate_new_datablock(); + if (ll->first_block == NULL) + return ZIP_INTERNALERROR; + } + + ldi = ll->last_block; + from_copy = (unsigned char*)buf; + + while (len>0) + { + uInt copy_this; + uInt i; + unsigned char* to_copy; + + if (ldi->avail_in_this_block==0) + { + ldi->next_datablock = allocate_new_datablock(); + if (ldi->next_datablock == NULL) + return ZIP_INTERNALERROR; + ldi = ldi->next_datablock ; + ll->last_block = ldi; + } + + if (ldi->avail_in_this_block < len) + copy_this = (uInt)ldi->avail_in_this_block; + else + copy_this = (uInt)len; + + to_copy = &(ldi->data[ldi->filled_in_this_block]); + + for (i=0;ifilled_in_this_block += copy_this; + ldi->avail_in_this_block -= copy_this; + from_copy += copy_this ; + len -= copy_this; + } + return ZIP_OK; +} + + + +/****************************************************************************/ + +#ifndef NO_ADDFILEINEXISTINGZIP +/* =========================================================================== + Inputs a long in LSB order to the given file + nbByte == 1, 2 or 4 (byte, short or long) +*/ + +local int ziplocal_putValue OF((const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream, uLong x, int nbByte)); +local int ziplocal_putValue (pzlib_filefunc_def, filestream, x, nbByte) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; + uLong x; + int nbByte; +{ + unsigned char buf[4]; + int n; + for (n = 0; n < nbByte; n++) + { + buf[n] = (unsigned char)(x & 0xff); + x >>= 8; + } + if (x != 0) + { /* data overflow - hack for ZIP64 (X Roche) */ + for (n = 0; n < nbByte; n++) + { + buf[n] = 0xff; + } + } + + if (ZWRITE(*pzlib_filefunc_def,filestream,buf,nbByte)!=(uLong)nbByte) + return ZIP_ERRNO; + else + return ZIP_OK; +} + +local void ziplocal_putValue_inmemory OF((void* dest, uLong x, int nbByte)); +local void ziplocal_putValue_inmemory (dest, x, nbByte) + void* dest; + uLong x; + int nbByte; +{ + unsigned char* buf=(unsigned char*)dest; + int n; + for (n = 0; n < nbByte; n++) { + buf[n] = (unsigned char)(x & 0xff); + x >>= 8; + } + + if (x != 0) + { /* data overflow - hack for ZIP64 */ + for (n = 0; n < nbByte; n++) + { + buf[n] = 0xff; + } + } +} + +/****************************************************************************/ + + +local uLong ziplocal_TmzDateToDosDate(ptm,dosDate) + const tm_zip* ptm; + uLong dosDate; +{ + uLong year = (uLong)ptm->tm_year; + if (year>1980) + year-=1980; + else if (year>80) + year-=80; + return + (uLong) (((ptm->tm_mday) + (32 * (ptm->tm_mon+1)) + (512 * year)) << 16) | + ((ptm->tm_sec/2) + (32* ptm->tm_min) + (2048 * (uLong)ptm->tm_hour)); +} + + +/****************************************************************************/ + +local int ziplocal_getByte OF(( + const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream, + int *pi)); + +local int ziplocal_getByte(pzlib_filefunc_def,filestream,pi) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; + int *pi; +{ + unsigned char c; + int err = (int)ZREAD(*pzlib_filefunc_def,filestream,&c,1); + if (err==1) + { + *pi = (int)c; + return ZIP_OK; + } + else + { + if (ZERROR(*pzlib_filefunc_def,filestream)) + return ZIP_ERRNO; + else + return ZIP_EOF; + } +} + + +/* =========================================================================== + Reads a long in LSB order from the given gz_stream. Sets +*/ +local int ziplocal_getShort OF(( + const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream, + uLong *pX)); + +local int ziplocal_getShort (pzlib_filefunc_def,filestream,pX) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; + uLong *pX; +{ + uLong x ; + int i; + int err; + + err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i); + x = (uLong)i; + + if (err==ZIP_OK) + err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<8; + + if (err==ZIP_OK) + *pX = x; + else + *pX = 0; + return err; +} + +local int ziplocal_getLong OF(( + const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream, + uLong *pX)); + +local int ziplocal_getLong (pzlib_filefunc_def,filestream,pX) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; + uLong *pX; +{ + uLong x ; + int i; + int err; + + err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i); + x = (uLong)i; + + if (err==ZIP_OK) + err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<8; + + if (err==ZIP_OK) + err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<16; + + if (err==ZIP_OK) + err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<24; + + if (err==ZIP_OK) + *pX = x; + else + *pX = 0; + return err; +} + +#ifndef BUFREADCOMMENT +#define BUFREADCOMMENT (0x400) +#endif +/* + Locate the Central directory of a zipfile (at the end, just before + the global comment) +*/ +local uLong ziplocal_SearchCentralDir OF(( + const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream)); + +local uLong ziplocal_SearchCentralDir(pzlib_filefunc_def,filestream) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; +{ + unsigned char* buf; + uLong uSizeFile; + uLong uBackRead; + uLong uMaxBack=0xffff; /* maximum size of global comment */ + uLong uPosFound=0; + + if (ZSEEK(*pzlib_filefunc_def,filestream,0,ZLIB_FILEFUNC_SEEK_END) != 0) + return 0; + + + uSizeFile = ZTELL(*pzlib_filefunc_def,filestream); + + if (uMaxBack>uSizeFile) + uMaxBack = uSizeFile; + + buf = (unsigned char*)ALLOC(BUFREADCOMMENT+4); + if (buf==NULL) + return 0; + + uBackRead = 4; + while (uBackReaduMaxBack) + uBackRead = uMaxBack; + else + uBackRead+=BUFREADCOMMENT; + uReadPos = uSizeFile-uBackRead ; + + uReadSize = ((BUFREADCOMMENT+4) < (uSizeFile-uReadPos)) ? + (BUFREADCOMMENT+4) : (uSizeFile-uReadPos); + if (ZSEEK(*pzlib_filefunc_def,filestream,uReadPos,ZLIB_FILEFUNC_SEEK_SET)!=0) + break; + + if (ZREAD(*pzlib_filefunc_def,filestream,buf,uReadSize)!=uReadSize) + break; + + for (i=(int)uReadSize-3; (i--)>0;) + if (((*(buf+i))==0x50) && ((*(buf+i+1))==0x4b) && + ((*(buf+i+2))==0x05) && ((*(buf+i+3))==0x06)) + { + uPosFound = uReadPos+i; + break; + } + + if (uPosFound!=0) + break; + } + TRYFREE(buf); + return uPosFound; +} +#endif /* !NO_ADDFILEINEXISTINGZIP*/ + +/************************************************************/ +extern zipFile ZEXPORT zipOpen2 (pathname, append, globalcomment, pzlib_filefunc_def) + const char *pathname; + int append; + zipcharpc* globalcomment; + zlib_filefunc_def* pzlib_filefunc_def; +{ + zip_internal ziinit; + zip_internal* zi; + int err=ZIP_OK; + + + if (pzlib_filefunc_def==NULL) + fill_fopen_filefunc(&ziinit.z_filefunc); + else + ziinit.z_filefunc = *pzlib_filefunc_def; + + ziinit.filestream = (*(ziinit.z_filefunc.zopen_file)) + (ziinit.z_filefunc.opaque, + pathname, + (append == APPEND_STATUS_CREATE) ? + (ZLIB_FILEFUNC_MODE_READ | ZLIB_FILEFUNC_MODE_WRITE | ZLIB_FILEFUNC_MODE_CREATE) : + (ZLIB_FILEFUNC_MODE_READ | ZLIB_FILEFUNC_MODE_WRITE | ZLIB_FILEFUNC_MODE_EXISTING)); + + if (ziinit.filestream == NULL) + return NULL; + ziinit.begin_pos = ZTELL(ziinit.z_filefunc,ziinit.filestream); + ziinit.in_opened_file_inzip = 0; + ziinit.ci.stream_initialised = 0; + ziinit.number_entry = 0; + ziinit.add_position_when_writting_offset = 0; + init_linkedlist(&(ziinit.central_dir)); + + + zi = (zip_internal*)ALLOC(sizeof(zip_internal)); + if (zi==NULL) + { + ZCLOSE(ziinit.z_filefunc,ziinit.filestream); + return NULL; + } + + /* now we add file in a zipfile */ +# ifndef NO_ADDFILEINEXISTINGZIP + ziinit.globalcomment = NULL; + if (append == APPEND_STATUS_ADDINZIP) + { + uLong byte_before_the_zipfile;/* byte before the zipfile, (>0 for sfx)*/ + + uLong size_central_dir; /* size of the central directory */ + uLong offset_central_dir; /* offset of start of central directory */ + uLong central_pos,uL; + + uLong number_disk; /* number of the current dist, used for + spaning ZIP, unsupported, always 0*/ + uLong number_disk_with_CD; /* number the the disk with central dir, used + for spaning ZIP, unsupported, always 0*/ + uLong number_entry; + uLong number_entry_CD; /* total number of entries in + the central dir + (same than number_entry on nospan) */ + uLong size_comment; + + central_pos = ziplocal_SearchCentralDir(&ziinit.z_filefunc,ziinit.filestream); + if (central_pos==0) + err=ZIP_ERRNO; + + if (ZSEEK(ziinit.z_filefunc, ziinit.filestream, + central_pos,ZLIB_FILEFUNC_SEEK_SET)!=0) + err=ZIP_ERRNO; + + /* the signature, already checked */ + if (ziplocal_getLong(&ziinit.z_filefunc, ziinit.filestream,&uL)!=ZIP_OK) + err=ZIP_ERRNO; + + /* number of this disk */ + if (ziplocal_getShort(&ziinit.z_filefunc, ziinit.filestream,&number_disk)!=ZIP_OK) + err=ZIP_ERRNO; + + /* number of the disk with the start of the central directory */ + if (ziplocal_getShort(&ziinit.z_filefunc, ziinit.filestream,&number_disk_with_CD)!=ZIP_OK) + err=ZIP_ERRNO; + + /* total number of entries in the central dir on this disk */ + if (ziplocal_getShort(&ziinit.z_filefunc, ziinit.filestream,&number_entry)!=ZIP_OK) + err=ZIP_ERRNO; + + /* total number of entries in the central dir */ + if (ziplocal_getShort(&ziinit.z_filefunc, ziinit.filestream,&number_entry_CD)!=ZIP_OK) + err=ZIP_ERRNO; + + if ((number_entry_CD!=number_entry) || + (number_disk_with_CD!=0) || + (number_disk!=0)) + err=ZIP_BADZIPFILE; + + /* size of the central directory */ + if (ziplocal_getLong(&ziinit.z_filefunc, ziinit.filestream,&size_central_dir)!=ZIP_OK) + err=ZIP_ERRNO; + + /* offset of start of central directory with respect to the + starting disk number */ + if (ziplocal_getLong(&ziinit.z_filefunc, ziinit.filestream,&offset_central_dir)!=ZIP_OK) + err=ZIP_ERRNO; + + /* zipfile global comment length */ + if (ziplocal_getShort(&ziinit.z_filefunc, ziinit.filestream,&size_comment)!=ZIP_OK) + err=ZIP_ERRNO; + + if ((central_pos0) + { + ziinit.globalcomment = ALLOC(size_comment+1); + if (ziinit.globalcomment) + { + size_comment = ZREAD(ziinit.z_filefunc, ziinit.filestream,ziinit.globalcomment,size_comment); + ziinit.globalcomment[size_comment]=0; + } + } + + byte_before_the_zipfile = central_pos - + (offset_central_dir+size_central_dir); + ziinit.add_position_when_writting_offset = byte_before_the_zipfile; + + { + uLong size_central_dir_to_read = size_central_dir; + size_t buf_size = SIZEDATA_INDATABLOCK; + void* buf_read = (void*)ALLOC(buf_size); + if (ZSEEK(ziinit.z_filefunc, ziinit.filestream, + offset_central_dir + byte_before_the_zipfile, + ZLIB_FILEFUNC_SEEK_SET) != 0) + err=ZIP_ERRNO; + + while ((size_central_dir_to_read>0) && (err==ZIP_OK)) + { + uLong read_this = SIZEDATA_INDATABLOCK; + if (read_this > size_central_dir_to_read) + read_this = size_central_dir_to_read; + if (ZREAD(ziinit.z_filefunc, ziinit.filestream,buf_read,read_this) != read_this) + err=ZIP_ERRNO; + + if (err==ZIP_OK) + err = add_data_in_datablock(&ziinit.central_dir,buf_read, + (uLong)read_this); + size_central_dir_to_read-=read_this; + } + TRYFREE(buf_read); + } + ziinit.begin_pos = byte_before_the_zipfile; + ziinit.number_entry = number_entry_CD; + + if (ZSEEK(ziinit.z_filefunc, ziinit.filestream, + offset_central_dir+byte_before_the_zipfile,ZLIB_FILEFUNC_SEEK_SET)!=0) + err=ZIP_ERRNO; + } + + if (globalcomment) + { + *globalcomment = ziinit.globalcomment; + } +# endif /* !NO_ADDFILEINEXISTINGZIP*/ + + if (err != ZIP_OK) + { +# ifndef NO_ADDFILEINEXISTINGZIP + TRYFREE(ziinit.globalcomment); +# endif /* !NO_ADDFILEINEXISTINGZIP*/ + TRYFREE(zi); + return NULL; + } + else + { + *zi = ziinit; + return (zipFile)zi; + } +} + +extern zipFile ZEXPORT zipOpen (pathname, append) + const char *pathname; + int append; +{ + return zipOpen2(pathname,append,NULL,NULL); +} + +extern int ZEXPORT zipOpenNewFileInZip3 (file, filename, zipfi, + extrafield_local, size_extrafield_local, + extrafield_global, size_extrafield_global, + comment, method, level, raw, + windowBits, memLevel, strategy, + password, crcForCrypting) + zipFile file; + const char* filename; + const zip_fileinfo* zipfi; + const void* extrafield_local; + uInt size_extrafield_local; + const void* extrafield_global; + uInt size_extrafield_global; + const char* comment; + int method; + int level; + int raw; + int windowBits; + int memLevel; + int strategy; + const char* password; + uLong crcForCrypting; +{ + zip_internal* zi; + uInt size_filename; + uInt size_comment; + uInt i; + int err = ZIP_OK; + +# ifdef NOCRYPT + if (password != NULL) + return ZIP_PARAMERROR; +# endif + + if (file == NULL) + return ZIP_PARAMERROR; + if ((method!=0) && (method!=Z_DEFLATED)) + return ZIP_PARAMERROR; + + zi = (zip_internal*)file; + + if (zi->in_opened_file_inzip == 1) + { + err = zipCloseFileInZip (file); + if (err != ZIP_OK) + return err; + } + + + if (filename==NULL) + filename="-"; + + if (comment==NULL) + size_comment = 0; + else + size_comment = (uInt)strlen(comment); + + size_filename = (uInt)strlen(filename); + + if (zipfi == NULL) + zi->ci.dosDate = 0; + else + { + if (zipfi->dosDate != 0) + zi->ci.dosDate = zipfi->dosDate; + else zi->ci.dosDate = ziplocal_TmzDateToDosDate(&zipfi->tmz_date,zipfi->dosDate); + } + + zi->ci.flag = 0; + if ((level==8) || (level==9)) + zi->ci.flag |= 2; + if ((level==2)) + zi->ci.flag |= 4; + if ((level==1)) + zi->ci.flag |= 6; + if (password != NULL) + zi->ci.flag |= 1; + + zi->ci.crc32 = 0; + zi->ci.method = method; + zi->ci.encrypt = 0; + zi->ci.stream_initialised = 0; + zi->ci.pos_in_buffered_data = 0; + zi->ci.raw = raw; + zi->ci.pos_local_header = ZTELL(zi->z_filefunc,zi->filestream) ; + zi->ci.size_centralheader = SIZECENTRALHEADER + size_filename + + size_extrafield_global + size_comment; + zi->ci.central_header = (char*)ALLOC((uInt)zi->ci.size_centralheader); + + ziplocal_putValue_inmemory(zi->ci.central_header,(uLong)CENTRALHEADERMAGIC,4); + /* version info */ + ziplocal_putValue_inmemory(zi->ci.central_header+4,(uLong)VERSIONMADEBY,2); + ziplocal_putValue_inmemory(zi->ci.central_header+6,(uLong)20,2); + ziplocal_putValue_inmemory(zi->ci.central_header+8,(uLong)zi->ci.flag,2); + ziplocal_putValue_inmemory(zi->ci.central_header+10,(uLong)zi->ci.method,2); + ziplocal_putValue_inmemory(zi->ci.central_header+12,(uLong)zi->ci.dosDate,4); + ziplocal_putValue_inmemory(zi->ci.central_header+16,(uLong)0,4); /*crc*/ + ziplocal_putValue_inmemory(zi->ci.central_header+20,(uLong)0,4); /*compr size*/ + ziplocal_putValue_inmemory(zi->ci.central_header+24,(uLong)0,4); /*uncompr size*/ + ziplocal_putValue_inmemory(zi->ci.central_header+28,(uLong)size_filename,2); + ziplocal_putValue_inmemory(zi->ci.central_header+30,(uLong)size_extrafield_global,2); + ziplocal_putValue_inmemory(zi->ci.central_header+32,(uLong)size_comment,2); + ziplocal_putValue_inmemory(zi->ci.central_header+34,(uLong)0,2); /*disk nm start*/ + + if (zipfi==NULL) + ziplocal_putValue_inmemory(zi->ci.central_header+36,(uLong)0,2); + else + ziplocal_putValue_inmemory(zi->ci.central_header+36,(uLong)zipfi->internal_fa,2); + + if (zipfi==NULL) + ziplocal_putValue_inmemory(zi->ci.central_header+38,(uLong)0,4); + else + ziplocal_putValue_inmemory(zi->ci.central_header+38,(uLong)zipfi->external_fa,4); + + ziplocal_putValue_inmemory(zi->ci.central_header+42,(uLong)zi->ci.pos_local_header- zi->add_position_when_writting_offset,4); + + for (i=0;ici.central_header+SIZECENTRALHEADER+i) = *(filename+i); + + for (i=0;ici.central_header+SIZECENTRALHEADER+size_filename+i) = + *(((const char*)extrafield_global)+i); + + for (i=0;ici.central_header+SIZECENTRALHEADER+size_filename+ + size_extrafield_global+i) = *(comment+i); + if (zi->ci.central_header == NULL) + return ZIP_INTERNALERROR; + + /* write the local header */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)LOCALHEADERMAGIC,4); + + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)20,2);/* version needed to extract */ + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->ci.flag,2); + + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->ci.method,2); + + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->ci.dosDate,4); + + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,4); /* crc 32, unknown */ + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,4); /* compressed size, unknown */ + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,4); /* uncompressed size, unknown */ + + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)size_filename,2); + + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)size_extrafield_local,2); + + if ((err==ZIP_OK) && (size_filename>0)) + if (ZWRITE(zi->z_filefunc,zi->filestream,filename,size_filename)!=size_filename) + err = ZIP_ERRNO; + + if ((err==ZIP_OK) && (size_extrafield_local>0)) + if (ZWRITE(zi->z_filefunc,zi->filestream,extrafield_local,size_extrafield_local) + !=size_extrafield_local) + err = ZIP_ERRNO; + + zi->ci.stream.avail_in = (uInt)0; + zi->ci.stream.avail_out = (uInt)Z_BUFSIZE; + zi->ci.stream.next_out = zi->ci.buffered_data; + zi->ci.stream.total_in = 0; + zi->ci.stream.total_out = 0; + + if ((err==ZIP_OK) && (zi->ci.method == Z_DEFLATED) && (!zi->ci.raw)) + { + zi->ci.stream.zalloc = (alloc_func)0; + zi->ci.stream.zfree = (free_func)0; + zi->ci.stream.opaque = (voidpf)0; + + if (windowBits>0) + windowBits = -windowBits; + + err = deflateInit2(&zi->ci.stream, level, + Z_DEFLATED, windowBits, memLevel, strategy); + + if (err==Z_OK) + zi->ci.stream_initialised = 1; + } +# ifndef NOCRYPT + zi->ci.crypt_header_size = 0; + if ((err==Z_OK) && (password != NULL)) + { + unsigned char bufHead[RAND_HEAD_LEN]; + unsigned int sizeHead; + zi->ci.encrypt = 1; + zi->ci.pcrc_32_tab = get_crc_table(); + /*init_keys(password,zi->ci.keys,zi->ci.pcrc_32_tab);*/ + + sizeHead=crypthead(password,bufHead,RAND_HEAD_LEN,zi->ci.keys,zi->ci.pcrc_32_tab,crcForCrypting); + zi->ci.crypt_header_size = sizeHead; + + if (ZWRITE(zi->z_filefunc,zi->filestream,bufHead,sizeHead) != sizeHead) + err = ZIP_ERRNO; + } +# endif + + if (err==Z_OK) + zi->in_opened_file_inzip = 1; + return err; +} + +extern int ZEXPORT zipOpenNewFileInZip2(file, filename, zipfi, + extrafield_local, size_extrafield_local, + extrafield_global, size_extrafield_global, + comment, method, level, raw) + zipFile file; + const char* filename; + const zip_fileinfo* zipfi; + const void* extrafield_local; + uInt size_extrafield_local; + const void* extrafield_global; + uInt size_extrafield_global; + const char* comment; + int method; + int level; + int raw; +{ + return zipOpenNewFileInZip3 (file, filename, zipfi, + extrafield_local, size_extrafield_local, + extrafield_global, size_extrafield_global, + comment, method, level, raw, + -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, + NULL, 0); +} + +extern int ZEXPORT zipOpenNewFileInZip (file, filename, zipfi, + extrafield_local, size_extrafield_local, + extrafield_global, size_extrafield_global, + comment, method, level) + zipFile file; + const char* filename; + const zip_fileinfo* zipfi; + const void* extrafield_local; + uInt size_extrafield_local; + const void* extrafield_global; + uInt size_extrafield_global; + const char* comment; + int method; + int level; +{ + return zipOpenNewFileInZip2 (file, filename, zipfi, + extrafield_local, size_extrafield_local, + extrafield_global, size_extrafield_global, + comment, method, level, 0); +} + +local int zipFlushWriteBuffer(zi) + zip_internal* zi; +{ + int err=ZIP_OK; + + if (zi->ci.encrypt != 0) + { +#ifndef NOCRYPT + uInt i; + int t; + for (i=0;ici.pos_in_buffered_data;i++) + zi->ci.buffered_data[i] = zencode(zi->ci.keys, zi->ci.pcrc_32_tab, + zi->ci.buffered_data[i],t); +#endif + } + if (ZWRITE(zi->z_filefunc,zi->filestream,zi->ci.buffered_data,zi->ci.pos_in_buffered_data) + !=zi->ci.pos_in_buffered_data) + err = ZIP_ERRNO; + zi->ci.pos_in_buffered_data = 0; + return err; +} + +extern int ZEXPORT zipWriteInFileInZip (file, buf, len) + zipFile file; + const void* buf; + unsigned len; +{ + zip_internal* zi; + int err=ZIP_OK; + + if (file == NULL) + return ZIP_PARAMERROR; + zi = (zip_internal*)file; + + if (zi->in_opened_file_inzip == 0) + return ZIP_PARAMERROR; + + zi->ci.stream.next_in = (void*)buf; + zi->ci.stream.avail_in = len; + zi->ci.crc32 = crc32(zi->ci.crc32,buf,len); + + while ((err==ZIP_OK) && (zi->ci.stream.avail_in>0)) + { + if (zi->ci.stream.avail_out == 0) + { + if (zipFlushWriteBuffer(zi) == ZIP_ERRNO) + err = ZIP_ERRNO; + zi->ci.stream.avail_out = (uInt)Z_BUFSIZE; + zi->ci.stream.next_out = zi->ci.buffered_data; + } + + + if(err != ZIP_OK) + break; + + if ((zi->ci.method == Z_DEFLATED) && (!zi->ci.raw)) + { + uLong uTotalOutBefore = zi->ci.stream.total_out; + err=deflate(&zi->ci.stream, Z_NO_FLUSH); + zi->ci.pos_in_buffered_data += (uInt)(zi->ci.stream.total_out - uTotalOutBefore) ; + + } + else + { + uInt copy_this,i; + if (zi->ci.stream.avail_in < zi->ci.stream.avail_out) + copy_this = zi->ci.stream.avail_in; + else + copy_this = zi->ci.stream.avail_out; + for (i=0;ici.stream.next_out)+i) = + *(((const char*)zi->ci.stream.next_in)+i); + { + zi->ci.stream.avail_in -= copy_this; + zi->ci.stream.avail_out-= copy_this; + zi->ci.stream.next_in+= copy_this; + zi->ci.stream.next_out+= copy_this; + zi->ci.stream.total_in+= copy_this; + zi->ci.stream.total_out+= copy_this; + zi->ci.pos_in_buffered_data += copy_this; + } + } + } + + return err; +} + +extern int ZEXPORT zipCloseFileInZipRaw (file, uncompressed_size, crc32) + zipFile file; + uLong uncompressed_size; + uLong crc32; +{ + zip_internal* zi; + uLong compressed_size; + int err=ZIP_OK; + + if (file == NULL) + return ZIP_PARAMERROR; + zi = (zip_internal*)file; + + if (zi->in_opened_file_inzip == 0) + return ZIP_PARAMERROR; + zi->ci.stream.avail_in = 0; + + if ((zi->ci.method == Z_DEFLATED) && (!zi->ci.raw)) + while (err==ZIP_OK) + { + uLong uTotalOutBefore; + if (zi->ci.stream.avail_out == 0) + { + if (zipFlushWriteBuffer(zi) == ZIP_ERRNO) + err = ZIP_ERRNO; + zi->ci.stream.avail_out = (uInt)Z_BUFSIZE; + zi->ci.stream.next_out = zi->ci.buffered_data; + } + uTotalOutBefore = zi->ci.stream.total_out; + err=deflate(&zi->ci.stream, Z_FINISH); + zi->ci.pos_in_buffered_data += (uInt)(zi->ci.stream.total_out - uTotalOutBefore) ; + } + + if (err==Z_STREAM_END) + err=ZIP_OK; /* this is normal */ + + if ((zi->ci.pos_in_buffered_data>0) && (err==ZIP_OK)) + if (zipFlushWriteBuffer(zi)==ZIP_ERRNO) + err = ZIP_ERRNO; + + if ((zi->ci.method == Z_DEFLATED) && (!zi->ci.raw)) + { + err=deflateEnd(&zi->ci.stream); + zi->ci.stream_initialised = 0; + } + + if (!zi->ci.raw) + { + crc32 = (uLong)zi->ci.crc32; + uncompressed_size = (uLong)zi->ci.stream.total_in; + } + compressed_size = (uLong)zi->ci.stream.total_out; +# ifndef NOCRYPT + compressed_size += zi->ci.crypt_header_size; +# endif + + ziplocal_putValue_inmemory(zi->ci.central_header+16,crc32,4); /*crc*/ + ziplocal_putValue_inmemory(zi->ci.central_header+20, + compressed_size,4); /*compr size*/ + if (zi->ci.stream.data_type == Z_ASCII) + ziplocal_putValue_inmemory(zi->ci.central_header+36,(uLong)Z_ASCII,2); + ziplocal_putValue_inmemory(zi->ci.central_header+24, + uncompressed_size,4); /*uncompr size*/ + + if (err==ZIP_OK) + err = add_data_in_datablock(&zi->central_dir,zi->ci.central_header, + (uLong)zi->ci.size_centralheader); + free(zi->ci.central_header); + + if (err==ZIP_OK) + { + long cur_pos_inzip = ZTELL(zi->z_filefunc,zi->filestream); + if (ZSEEK(zi->z_filefunc,zi->filestream, + zi->ci.pos_local_header + 14,ZLIB_FILEFUNC_SEEK_SET)!=0) + err = ZIP_ERRNO; + + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,crc32,4); /* crc 32, unknown */ + + if (err==ZIP_OK) /* compressed size, unknown */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,compressed_size,4); + + if (err==ZIP_OK) /* uncompressed size, unknown */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,uncompressed_size,4); + + if (ZSEEK(zi->z_filefunc,zi->filestream, + cur_pos_inzip,ZLIB_FILEFUNC_SEEK_SET)!=0) + err = ZIP_ERRNO; + } + + zi->number_entry ++; + zi->in_opened_file_inzip = 0; + + return err; +} + +extern int ZEXPORT zipCloseFileInZip (file) + zipFile file; +{ + return zipCloseFileInZipRaw (file,0,0); +} + +extern int ZEXPORT zipClose (file, global_comment) + zipFile file; + const char* global_comment; +{ + zip_internal* zi; + int err = 0; + uLong size_centraldir = 0; + uLong centraldir_pos_inzip; + uInt size_global_comment; + if (file == NULL) + return ZIP_PARAMERROR; + zi = (zip_internal*)file; + + if (zi->in_opened_file_inzip == 1) + { + err = zipCloseFileInZip (file); + } + +#ifndef NO_ADDFILEINEXISTINGZIP + if (global_comment==NULL) + global_comment = zi->globalcomment; +#endif + if (global_comment==NULL) + size_global_comment = 0; + else + size_global_comment = (uInt)strlen(global_comment); + + centraldir_pos_inzip = ZTELL(zi->z_filefunc,zi->filestream); + if (err==ZIP_OK) + { + linkedlist_datablock_internal* ldi = zi->central_dir.first_block ; + while (ldi!=NULL) + { + if ((err==ZIP_OK) && (ldi->filled_in_this_block>0)) + if (ZWRITE(zi->z_filefunc,zi->filestream, + ldi->data,ldi->filled_in_this_block) + !=ldi->filled_in_this_block ) + err = ZIP_ERRNO; + + size_centraldir += ldi->filled_in_this_block; + ldi = ldi->next_datablock; + } + } + free_datablock(zi->central_dir.first_block); + + if (err==ZIP_OK) /* Magic End */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)ENDHEADERMAGIC,4); + + if (err==ZIP_OK) /* number of this disk */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,2); + + if (err==ZIP_OK) /* number of the disk with the start of the central directory */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,2); + + if (err==ZIP_OK) /* total number of entries in the central dir on this disk */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->number_entry,2); + + if (err==ZIP_OK) /* total number of entries in the central dir */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->number_entry,2); + + if (err==ZIP_OK) /* size of the central directory */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)size_centraldir,4); + + if (err==ZIP_OK) /* offset of start of central directory with respect to the + starting disk number */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream, + (uLong)(centraldir_pos_inzip - zi->add_position_when_writting_offset),4); + + if (err==ZIP_OK) /* zipfile comment length */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)size_global_comment,2); + + if ((err==ZIP_OK) && (size_global_comment>0)) + if (ZWRITE(zi->z_filefunc,zi->filestream, + global_comment,size_global_comment) != size_global_comment) + err = ZIP_ERRNO; + + if (ZCLOSE(zi->z_filefunc,zi->filestream) != 0) + if (err == ZIP_OK) + err = ZIP_ERRNO; + +#ifndef NO_ADDFILEINEXISTINGZIP + TRYFREE(zi->globalcomment); +#endif + TRYFREE(zi); + + return err; +} Added: external/zlib/contrib/minizip/zip.h ============================================================================== --- (empty file) +++ external/zlib/contrib/minizip/zip.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,235 @@ +/* zip.h -- IO for compress .zip files using zlib + Version 1.01e, February 12th, 2005 + + Copyright (C) 1998-2005 Gilles Vollant + + This unzip package allow creates .ZIP file, compatible with PKZip 2.04g + WinZip, InfoZip tools and compatible. + Multi volume ZipFile (span) are not supported. + Encryption compatible with pkzip 2.04g only supported + Old compressions used by old PKZip 1.x are not supported + + For uncompress .zip file, look at unzip.h + + + I WAIT FEEDBACK at mail info at winimage.com + Visit also http://www.winimage.com/zLibDll/unzip.html for evolution + + Condition of use and distribution are the same than zlib : + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + +*/ + +/* for more info about .ZIP format, see + http://www.info-zip.org/pub/infozip/doc/appnote-981119-iz.zip + http://www.info-zip.org/pub/infozip/doc/ + PkWare has also a specification at : + ftp://ftp.pkware.com/probdesc.zip +*/ + +#ifndef _zip_H +#define _zip_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _ZLIB_H +#include "zlib.h" +#endif + +#ifndef _ZLIBIOAPI_H +#include "ioapi.h" +#endif + +#if defined(STRICTZIP) || defined(STRICTZIPUNZIP) +/* like the STRICT of WIN32, we define a pointer that cannot be converted + from (void*) without cast */ +typedef struct TagzipFile__ { int unused; } zipFile__; +typedef zipFile__ *zipFile; +#else +typedef voidp zipFile; +#endif + +#define ZIP_OK (0) +#define ZIP_EOF (0) +#define ZIP_ERRNO (Z_ERRNO) +#define ZIP_PARAMERROR (-102) +#define ZIP_BADZIPFILE (-103) +#define ZIP_INTERNALERROR (-104) + +#ifndef DEF_MEM_LEVEL +# if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +# else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +# endif +#endif +/* default memLevel */ + +/* tm_zip contain date/time info */ +typedef struct tm_zip_s +{ + uInt tm_sec; /* seconds after the minute - [0,59] */ + uInt tm_min; /* minutes after the hour - [0,59] */ + uInt tm_hour; /* hours since midnight - [0,23] */ + uInt tm_mday; /* day of the month - [1,31] */ + uInt tm_mon; /* months since January - [0,11] */ + uInt tm_year; /* years - [1980..2044] */ +} tm_zip; + +typedef struct +{ + tm_zip tmz_date; /* date in understandable format */ + uLong dosDate; /* if dos_date == 0, tmu_date is used */ +/* uLong flag; */ /* general purpose bit flag 2 bytes */ + + uLong internal_fa; /* internal file attributes 2 bytes */ + uLong external_fa; /* external file attributes 4 bytes */ +} zip_fileinfo; + +typedef const char* zipcharpc; + + +#define APPEND_STATUS_CREATE (0) +#define APPEND_STATUS_CREATEAFTER (1) +#define APPEND_STATUS_ADDINZIP (2) + +extern zipFile ZEXPORT zipOpen OF((const char *pathname, int append)); +/* + Create a zipfile. + pathname contain on Windows XP a filename like "c:\\zlib\\zlib113.zip" or on + an Unix computer "zlib/zlib113.zip". + if the file pathname exist and append==APPEND_STATUS_CREATEAFTER, the zip + will be created at the end of the file. + (useful if the file contain a self extractor code) + if the file pathname exist and append==APPEND_STATUS_ADDINZIP, we will + add files in existing zip (be sure you don't add file that doesn't exist) + If the zipfile cannot be opened, the return value is NULL. + Else, the return value is a zipFile Handle, usable with other function + of this zip package. +*/ + +/* Note : there is no delete function into a zipfile. + If you want delete file into a zipfile, you must open a zipfile, and create another + Of couse, you can use RAW reading and writing to copy the file you did not want delte +*/ + +extern zipFile ZEXPORT zipOpen2 OF((const char *pathname, + int append, + zipcharpc* globalcomment, + zlib_filefunc_def* pzlib_filefunc_def)); + +extern int ZEXPORT zipOpenNewFileInZip OF((zipFile file, + const char* filename, + const zip_fileinfo* zipfi, + const void* extrafield_local, + uInt size_extrafield_local, + const void* extrafield_global, + uInt size_extrafield_global, + const char* comment, + int method, + int level)); +/* + Open a file in the ZIP for writing. + filename : the filename in zip (if NULL, '-' without quote will be used + *zipfi contain supplemental information + if extrafield_local!=NULL and size_extrafield_local>0, extrafield_local + contains the extrafield data the the local header + if extrafield_global!=NULL and size_extrafield_global>0, extrafield_global + contains the extrafield data the the local header + if comment != NULL, comment contain the comment string + method contain the compression method (0 for store, Z_DEFLATED for deflate) + level contain the level of compression (can be Z_DEFAULT_COMPRESSION) +*/ + + +extern int ZEXPORT zipOpenNewFileInZip2 OF((zipFile file, + const char* filename, + const zip_fileinfo* zipfi, + const void* extrafield_local, + uInt size_extrafield_local, + const void* extrafield_global, + uInt size_extrafield_global, + const char* comment, + int method, + int level, + int raw)); + +/* + Same than zipOpenNewFileInZip, except if raw=1, we write raw file + */ + +extern int ZEXPORT zipOpenNewFileInZip3 OF((zipFile file, + const char* filename, + const zip_fileinfo* zipfi, + const void* extrafield_local, + uInt size_extrafield_local, + const void* extrafield_global, + uInt size_extrafield_global, + const char* comment, + int method, + int level, + int raw, + int windowBits, + int memLevel, + int strategy, + const char* password, + uLong crcForCtypting)); + +/* + Same than zipOpenNewFileInZip2, except + windowBits,memLevel,,strategy : see parameter strategy in deflateInit2 + password : crypting password (NULL for no crypting) + crcForCtypting : crc of file to compress (needed for crypting) + */ + + +extern int ZEXPORT zipWriteInFileInZip OF((zipFile file, + const void* buf, + unsigned len)); +/* + Write data in the zipfile +*/ + +extern int ZEXPORT zipCloseFileInZip OF((zipFile file)); +/* + Close the current file in the zipfile +*/ + +extern int ZEXPORT zipCloseFileInZipRaw OF((zipFile file, + uLong uncompressed_size, + uLong crc32)); +/* + Close the current file in the zipfile, for fiel opened with + parameter raw=1 in zipOpenNewFileInZip2 + uncompressed_size and crc32 are value for the uncompressed size +*/ + +extern int ZEXPORT zipClose OF((zipFile file, + const char* global_comment)); +/* + Close the zipfile +*/ + +#ifdef __cplusplus +} +#endif + +#endif /* _zip_H */ Added: external/zlib/contrib/pascal/example.pas ============================================================================== --- (empty file) +++ external/zlib/contrib/pascal/example.pas Tue Jan 3 07:42:59 2006 @@ -0,0 +1,599 @@ +(* example.c -- usage example of the zlib compression library + * Copyright (C) 1995-2003 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Pascal translation + * Copyright (C) 1998 by Jacques Nomssi Nzali. + * For conditions of distribution and use, see copyright notice in readme.txt + * + * Adaptation to the zlibpas interface + * Copyright (C) 2003 by Cosmin Truta. + * For conditions of distribution and use, see copyright notice in readme.txt + *) + +program example; + +{$DEFINE TEST_COMPRESS} +{DO NOT $DEFINE TEST_GZIO} +{$DEFINE TEST_DEFLATE} +{$DEFINE TEST_INFLATE} +{$DEFINE TEST_FLUSH} +{$DEFINE TEST_SYNC} +{$DEFINE TEST_DICT} + +uses SysUtils, zlibpas; + +const TESTFILE = 'foo.gz'; + +(* "hello world" would be more standard, but the repeated "hello" + * stresses the compression code better, sorry... + *) +const hello: PChar = 'hello, hello!'; + +const dictionary: PChar = 'hello'; + +var dictId: LongInt; (* Adler32 value of the dictionary *) + +procedure CHECK_ERR(err: Integer; msg: String); +begin + if err <> Z_OK then + begin + WriteLn(msg, ' error: ', err); + Halt(1); + end; +end; + +procedure EXIT_ERR(const msg: String); +begin + WriteLn('Error: ', msg); + Halt(1); +end; + +(* =========================================================================== + * Test compress and uncompress + *) +{$IFDEF TEST_COMPRESS} +procedure test_compress(compr: Pointer; comprLen: LongInt; + uncompr: Pointer; uncomprLen: LongInt); +var err: Integer; + len: LongInt; +begin + len := StrLen(hello)+1; + + err := compress(compr, comprLen, hello, len); + CHECK_ERR(err, 'compress'); + + StrCopy(PChar(uncompr), 'garbage'); + + err := uncompress(uncompr, uncomprLen, compr, comprLen); + CHECK_ERR(err, 'uncompress'); + + if StrComp(PChar(uncompr), hello) <> 0 then + EXIT_ERR('bad uncompress') + else + WriteLn('uncompress(): ', PChar(uncompr)); +end; +{$ENDIF} + +(* =========================================================================== + * Test read/write of .gz files + *) +{$IFDEF TEST_GZIO} +procedure test_gzio(const fname: PChar; (* compressed file name *) + uncompr: Pointer; + uncomprLen: LongInt); +var err: Integer; + len: Integer; + zfile: gzFile; + pos: LongInt; +begin + len := StrLen(hello)+1; + + zfile := gzopen(fname, 'wb'); + if zfile = NIL then + begin + WriteLn('gzopen error'); + Halt(1); + end; + gzputc(zfile, 'h'); + if gzputs(zfile, 'ello') <> 4 then + begin + WriteLn('gzputs err: ', gzerror(zfile, err)); + Halt(1); + end; + {$IFDEF GZ_FORMAT_STRING} + if gzprintf(zfile, ', %s!', 'hello') <> 8 then + begin + WriteLn('gzprintf err: ', gzerror(zfile, err)); + Halt(1); + end; + {$ELSE} + if gzputs(zfile, ', hello!') <> 8 then + begin + WriteLn('gzputs err: ', gzerror(zfile, err)); + Halt(1); + end; + {$ENDIF} + gzseek(zfile, 1, SEEK_CUR); (* add one zero byte *) + gzclose(zfile); + + zfile := gzopen(fname, 'rb'); + if zfile = NIL then + begin + WriteLn('gzopen error'); + Halt(1); + end; + + StrCopy(PChar(uncompr), 'garbage'); + + if gzread(zfile, uncompr, uncomprLen) <> len then + begin + WriteLn('gzread err: ', gzerror(zfile, err)); + Halt(1); + end; + if StrComp(PChar(uncompr), hello) <> 0 then + begin + WriteLn('bad gzread: ', PChar(uncompr)); + Halt(1); + end + else + WriteLn('gzread(): ', PChar(uncompr)); + + pos := gzseek(zfile, -8, SEEK_CUR); + if (pos <> 6) or (gztell(zfile) <> pos) then + begin + WriteLn('gzseek error, pos=', pos, ', gztell=', gztell(zfile)); + Halt(1); + end; + + if gzgetc(zfile) <> ' ' then + begin + WriteLn('gzgetc error'); + Halt(1); + end; + + if gzungetc(' ', zfile) <> ' ' then + begin + WriteLn('gzungetc error'); + Halt(1); + end; + + gzgets(zfile, PChar(uncompr), uncomprLen); + uncomprLen := StrLen(PChar(uncompr)); + if uncomprLen <> 7 then (* " hello!" *) + begin + WriteLn('gzgets err after gzseek: ', gzerror(zfile, err)); + Halt(1); + end; + if StrComp(PChar(uncompr), hello + 6) <> 0 then + begin + WriteLn('bad gzgets after gzseek'); + Halt(1); + end + else + WriteLn('gzgets() after gzseek: ', PChar(uncompr)); + + gzclose(zfile); +end; +{$ENDIF} + +(* =========================================================================== + * Test deflate with small buffers + *) +{$IFDEF TEST_DEFLATE} +procedure test_deflate(compr: Pointer; comprLen: LongInt); +var c_stream: z_stream; (* compression stream *) + err: Integer; + len: LongInt; +begin + len := StrLen(hello)+1; + + c_stream.zalloc := NIL; + c_stream.zfree := NIL; + c_stream.opaque := NIL; + + err := deflateInit(c_stream, Z_DEFAULT_COMPRESSION); + CHECK_ERR(err, 'deflateInit'); + + c_stream.next_in := hello; + c_stream.next_out := compr; + + while (c_stream.total_in <> len) and + (c_stream.total_out < comprLen) do + begin + c_stream.avail_out := 1; { force small buffers } + c_stream.avail_in := 1; + err := deflate(c_stream, Z_NO_FLUSH); + CHECK_ERR(err, 'deflate'); + end; + + (* Finish the stream, still forcing small buffers: *) + while TRUE do + begin + c_stream.avail_out := 1; + err := deflate(c_stream, Z_FINISH); + if err = Z_STREAM_END then + break; + CHECK_ERR(err, 'deflate'); + end; + + err := deflateEnd(c_stream); + CHECK_ERR(err, 'deflateEnd'); +end; +{$ENDIF} + +(* =========================================================================== + * Test inflate with small buffers + *) +{$IFDEF TEST_INFLATE} +procedure test_inflate(compr: Pointer; comprLen : LongInt; + uncompr: Pointer; uncomprLen : LongInt); +var err: Integer; + d_stream: z_stream; (* decompression stream *) +begin + StrCopy(PChar(uncompr), 'garbage'); + + d_stream.zalloc := NIL; + d_stream.zfree := NIL; + d_stream.opaque := NIL; + + d_stream.next_in := compr; + d_stream.avail_in := 0; + d_stream.next_out := uncompr; + + err := inflateInit(d_stream); + CHECK_ERR(err, 'inflateInit'); + + while (d_stream.total_out < uncomprLen) and + (d_stream.total_in < comprLen) do + begin + d_stream.avail_out := 1; (* force small buffers *) + d_stream.avail_in := 1; + err := inflate(d_stream, Z_NO_FLUSH); + if err = Z_STREAM_END then + break; + CHECK_ERR(err, 'inflate'); + end; + + err := inflateEnd(d_stream); + CHECK_ERR(err, 'inflateEnd'); + + if StrComp(PChar(uncompr), hello) <> 0 then + EXIT_ERR('bad inflate') + else + WriteLn('inflate(): ', PChar(uncompr)); +end; +{$ENDIF} + +(* =========================================================================== + * Test deflate with large buffers and dynamic change of compression level + *) +{$IFDEF TEST_DEFLATE} +procedure test_large_deflate(compr: Pointer; comprLen: LongInt; + uncompr: Pointer; uncomprLen: LongInt); +var c_stream: z_stream; (* compression stream *) + err: Integer; +begin + c_stream.zalloc := NIL; + c_stream.zfree := NIL; + c_stream.opaque := NIL; + + err := deflateInit(c_stream, Z_BEST_SPEED); + CHECK_ERR(err, 'deflateInit'); + + c_stream.next_out := compr; + c_stream.avail_out := Integer(comprLen); + + (* At this point, uncompr is still mostly zeroes, so it should compress + * very well: + *) + c_stream.next_in := uncompr; + c_stream.avail_in := Integer(uncomprLen); + err := deflate(c_stream, Z_NO_FLUSH); + CHECK_ERR(err, 'deflate'); + if c_stream.avail_in <> 0 then + EXIT_ERR('deflate not greedy'); + + (* Feed in already compressed data and switch to no compression: *) + deflateParams(c_stream, Z_NO_COMPRESSION, Z_DEFAULT_STRATEGY); + c_stream.next_in := compr; + c_stream.avail_in := Integer(comprLen div 2); + err := deflate(c_stream, Z_NO_FLUSH); + CHECK_ERR(err, 'deflate'); + + (* Switch back to compressing mode: *) + deflateParams(c_stream, Z_BEST_COMPRESSION, Z_FILTERED); + c_stream.next_in := uncompr; + c_stream.avail_in := Integer(uncomprLen); + err := deflate(c_stream, Z_NO_FLUSH); + CHECK_ERR(err, 'deflate'); + + err := deflate(c_stream, Z_FINISH); + if err <> Z_STREAM_END then + EXIT_ERR('deflate should report Z_STREAM_END'); + + err := deflateEnd(c_stream); + CHECK_ERR(err, 'deflateEnd'); +end; +{$ENDIF} + +(* =========================================================================== + * Test inflate with large buffers + *) +{$IFDEF TEST_INFLATE} +procedure test_large_inflate(compr: Pointer; comprLen: LongInt; + uncompr: Pointer; uncomprLen: LongInt); +var err: Integer; + d_stream: z_stream; (* decompression stream *) +begin + StrCopy(PChar(uncompr), 'garbage'); + + d_stream.zalloc := NIL; + d_stream.zfree := NIL; + d_stream.opaque := NIL; + + d_stream.next_in := compr; + d_stream.avail_in := Integer(comprLen); + + err := inflateInit(d_stream); + CHECK_ERR(err, 'inflateInit'); + + while TRUE do + begin + d_stream.next_out := uncompr; (* discard the output *) + d_stream.avail_out := Integer(uncomprLen); + err := inflate(d_stream, Z_NO_FLUSH); + if err = Z_STREAM_END then + break; + CHECK_ERR(err, 'large inflate'); + end; + + err := inflateEnd(d_stream); + CHECK_ERR(err, 'inflateEnd'); + + if d_stream.total_out <> 2 * uncomprLen + comprLen div 2 then + begin + WriteLn('bad large inflate: ', d_stream.total_out); + Halt(1); + end + else + WriteLn('large_inflate(): OK'); +end; +{$ENDIF} + +(* =========================================================================== + * Test deflate with full flush + *) +{$IFDEF TEST_FLUSH} +procedure test_flush(compr: Pointer; var comprLen : LongInt); +var c_stream: z_stream; (* compression stream *) + err: Integer; + len: Integer; +begin + len := StrLen(hello)+1; + + c_stream.zalloc := NIL; + c_stream.zfree := NIL; + c_stream.opaque := NIL; + + err := deflateInit(c_stream, Z_DEFAULT_COMPRESSION); + CHECK_ERR(err, 'deflateInit'); + + c_stream.next_in := hello; + c_stream.next_out := compr; + c_stream.avail_in := 3; + c_stream.avail_out := Integer(comprLen); + err := deflate(c_stream, Z_FULL_FLUSH); + CHECK_ERR(err, 'deflate'); + + Inc(PByteArray(compr)^[3]); (* force an error in first compressed block *) + c_stream.avail_in := len - 3; + + err := deflate(c_stream, Z_FINISH); + if err <> Z_STREAM_END then + CHECK_ERR(err, 'deflate'); + + err := deflateEnd(c_stream); + CHECK_ERR(err, 'deflateEnd'); + + comprLen := c_stream.total_out; +end; +{$ENDIF} + +(* =========================================================================== + * Test inflateSync() + *) +{$IFDEF TEST_SYNC} +procedure test_sync(compr: Pointer; comprLen: LongInt; + uncompr: Pointer; uncomprLen : LongInt); +var err: Integer; + d_stream: z_stream; (* decompression stream *) +begin + StrCopy(PChar(uncompr), 'garbage'); + + d_stream.zalloc := NIL; + d_stream.zfree := NIL; + d_stream.opaque := NIL; + + d_stream.next_in := compr; + d_stream.avail_in := 2; (* just read the zlib header *) + + err := inflateInit(d_stream); + CHECK_ERR(err, 'inflateInit'); + + d_stream.next_out := uncompr; + d_stream.avail_out := Integer(uncomprLen); + + inflate(d_stream, Z_NO_FLUSH); + CHECK_ERR(err, 'inflate'); + + d_stream.avail_in := Integer(comprLen-2); (* read all compressed data *) + err := inflateSync(d_stream); (* but skip the damaged part *) + CHECK_ERR(err, 'inflateSync'); + + err := inflate(d_stream, Z_FINISH); + if err <> Z_DATA_ERROR then + EXIT_ERR('inflate should report DATA_ERROR'); + (* Because of incorrect adler32 *) + + err := inflateEnd(d_stream); + CHECK_ERR(err, 'inflateEnd'); + + WriteLn('after inflateSync(): hel', PChar(uncompr)); +end; +{$ENDIF} + +(* =========================================================================== + * Test deflate with preset dictionary + *) +{$IFDEF TEST_DICT} +procedure test_dict_deflate(compr: Pointer; comprLen: LongInt); +var c_stream: z_stream; (* compression stream *) + err: Integer; +begin + c_stream.zalloc := NIL; + c_stream.zfree := NIL; + c_stream.opaque := NIL; + + err := deflateInit(c_stream, Z_BEST_COMPRESSION); + CHECK_ERR(err, 'deflateInit'); + + err := deflateSetDictionary(c_stream, dictionary, StrLen(dictionary)); + CHECK_ERR(err, 'deflateSetDictionary'); + + dictId := c_stream.adler; + c_stream.next_out := compr; + c_stream.avail_out := Integer(comprLen); + + c_stream.next_in := hello; + c_stream.avail_in := StrLen(hello)+1; + + err := deflate(c_stream, Z_FINISH); + if err <> Z_STREAM_END then + EXIT_ERR('deflate should report Z_STREAM_END'); + + err := deflateEnd(c_stream); + CHECK_ERR(err, 'deflateEnd'); +end; +{$ENDIF} + +(* =========================================================================== + * Test inflate with a preset dictionary + *) +{$IFDEF TEST_DICT} +procedure test_dict_inflate(compr: Pointer; comprLen: LongInt; + uncompr: Pointer; uncomprLen: LongInt); +var err: Integer; + d_stream: z_stream; (* decompression stream *) +begin + StrCopy(PChar(uncompr), 'garbage'); + + d_stream.zalloc := NIL; + d_stream.zfree := NIL; + d_stream.opaque := NIL; + + d_stream.next_in := compr; + d_stream.avail_in := Integer(comprLen); + + err := inflateInit(d_stream); + CHECK_ERR(err, 'inflateInit'); + + d_stream.next_out := uncompr; + d_stream.avail_out := Integer(uncomprLen); + + while TRUE do + begin + err := inflate(d_stream, Z_NO_FLUSH); + if err = Z_STREAM_END then + break; + if err = Z_NEED_DICT then + begin + if d_stream.adler <> dictId then + EXIT_ERR('unexpected dictionary'); + err := inflateSetDictionary(d_stream, dictionary, StrLen(dictionary)); + end; + CHECK_ERR(err, 'inflate with dict'); + end; + + err := inflateEnd(d_stream); + CHECK_ERR(err, 'inflateEnd'); + + if StrComp(PChar(uncompr), hello) <> 0 then + EXIT_ERR('bad inflate with dict') + else + WriteLn('inflate with dictionary: ', PChar(uncompr)); +end; +{$ENDIF} + +var compr, uncompr: Pointer; + comprLen, uncomprLen: LongInt; + +begin + if zlibVersion^ <> ZLIB_VERSION[1] then + EXIT_ERR('Incompatible zlib version'); + + WriteLn('zlib version: ', zlibVersion); + WriteLn('zlib compile flags: ', Format('0x%x', [zlibCompileFlags])); + + comprLen := 10000 * SizeOf(Integer); (* don't overflow on MSDOS *) + uncomprLen := comprLen; + GetMem(compr, comprLen); + GetMem(uncompr, uncomprLen); + if (compr = NIL) or (uncompr = NIL) then + EXIT_ERR('Out of memory'); + (* compr and uncompr are cleared to avoid reading uninitialized + * data and to ensure that uncompr compresses well. + *) + FillChar(compr^, comprLen, 0); + FillChar(uncompr^, uncomprLen, 0); + + {$IFDEF TEST_COMPRESS} + WriteLn('** Testing compress'); + test_compress(compr, comprLen, uncompr, uncomprLen); + {$ENDIF} + + {$IFDEF TEST_GZIO} + WriteLn('** Testing gzio'); + if ParamCount >= 1 then + test_gzio(ParamStr(1), uncompr, uncomprLen) + else + test_gzio(TESTFILE, uncompr, uncomprLen); + {$ENDIF} + + {$IFDEF TEST_DEFLATE} + WriteLn('** Testing deflate with small buffers'); + test_deflate(compr, comprLen); + {$ENDIF} + {$IFDEF TEST_INFLATE} + WriteLn('** Testing inflate with small buffers'); + test_inflate(compr, comprLen, uncompr, uncomprLen); + {$ENDIF} + + {$IFDEF TEST_DEFLATE} + WriteLn('** Testing deflate with large buffers'); + test_large_deflate(compr, comprLen, uncompr, uncomprLen); + {$ENDIF} + {$IFDEF TEST_INFLATE} + WriteLn('** Testing inflate with large buffers'); + test_large_inflate(compr, comprLen, uncompr, uncomprLen); + {$ENDIF} + + {$IFDEF TEST_FLUSH} + WriteLn('** Testing deflate with full flush'); + test_flush(compr, comprLen); + {$ENDIF} + {$IFDEF TEST_SYNC} + WriteLn('** Testing inflateSync'); + test_sync(compr, comprLen, uncompr, uncomprLen); + {$ENDIF} + comprLen := uncomprLen; + + {$IFDEF TEST_DICT} + WriteLn('** Testing deflate and inflate with preset dictionary'); + test_dict_deflate(compr, comprLen); + test_dict_inflate(compr, comprLen, uncompr, uncomprLen); + {$ENDIF} + + FreeMem(compr, comprLen); + FreeMem(uncompr, uncomprLen); +end. Added: external/zlib/contrib/pascal/readme.txt ============================================================================== --- (empty file) +++ external/zlib/contrib/pascal/readme.txt Tue Jan 3 07:42:59 2006 @@ -0,0 +1,76 @@ + +This directory contains a Pascal (Delphi, Kylix) interface to the +zlib data compression library. + + +Directory listing +================= + +zlibd32.mak makefile for Borland C++ +example.pas usage example of zlib +zlibpas.pas the Pascal interface to zlib +readme.txt this file + + +Compatibility notes +=================== + +- Although the name "zlib" would have been more normal for the + zlibpas unit, this name is already taken by Borland's ZLib unit. + This is somehow unfortunate, because that unit is not a genuine + interface to the full-fledged zlib functionality, but a suite of + class wrappers around zlib streams. Other essential features, + such as checksums, are missing. + It would have been more appropriate for that unit to have a name + like "ZStreams", or something similar. + +- The C and zlib-supplied types int, uInt, long, uLong, etc. are + translated directly into Pascal types of similar sizes (Integer, + LongInt, etc.), to avoid namespace pollution. In particular, + there is no conversion of unsigned int into a Pascal unsigned + integer. The Word type is non-portable and has the same size + (16 bits) both in a 16-bit and in a 32-bit environment, unlike + Integer. Even if there is a 32-bit Cardinal type, there is no + real need for unsigned int in zlib under a 32-bit environment. + +- Except for the callbacks, the zlib function interfaces are + assuming the calling convention normally used in Pascal + (__pascal for DOS and Windows16, __fastcall for Windows32). + Since the cdecl keyword is used, the old Turbo Pascal does + not work with this interface. + +- The gz* function interfaces are not translated, to avoid + interfacing problems with the C runtime library. Besides, + gzprintf(gzFile file, const char *format, ...) + cannot be translated into Pascal. + + +Legal issues +============ + +The zlibpas interface is: + Copyright (C) 1995-2003 Jean-loup Gailly and Mark Adler. + Copyright (C) 1998 by Bob Dellaca. + Copyright (C) 2003 by Cosmin Truta. + +The example program is: + Copyright (C) 1995-2003 by Jean-loup Gailly. + Copyright (C) 1998,1999,2000 by Jacques Nomssi Nzali. + Copyright (C) 2003 by Cosmin Truta. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + Added: external/zlib/contrib/pascal/zlibd32.mak ============================================================================== --- (empty file) +++ external/zlib/contrib/pascal/zlibd32.mak Tue Jan 3 07:42:59 2006 @@ -0,0 +1,93 @@ +# Makefile for zlib +# For use with Delphi and C++ Builder under Win32 +# Updated for zlib 1.2.x by Cosmin Truta + +# ------------ Borland C++ ------------ + +# This project uses the Delphi (fastcall/register) calling convention: +LOC = -DZEXPORT=__fastcall -DZEXPORTVA=__cdecl + +CC = bcc32 +LD = bcc32 +AR = tlib +# do not use "-pr" in CFLAGS +CFLAGS = -a -d -k- -O2 $(LOC) +LDFLAGS = + + +# variables +ZLIB_LIB = zlib.lib + +OBJ1 = adler32.obj compress.obj crc32.obj deflate.obj gzio.obj infback.obj +OBJ2 = inffast.obj inflate.obj inftrees.obj trees.obj uncompr.obj zutil.obj +OBJP1 = +adler32.obj+compress.obj+crc32.obj+deflate.obj+gzio.obj+infback.obj +OBJP2 = +inffast.obj+inflate.obj+inftrees.obj+trees.obj+uncompr.obj+zutil.obj + + +# targets +all: $(ZLIB_LIB) example.exe minigzip.exe + +.c.obj: + $(CC) -c $(CFLAGS) $*.c + +adler32.obj: adler32.c zlib.h zconf.h + +compress.obj: compress.c zlib.h zconf.h + +crc32.obj: crc32.c zlib.h zconf.h crc32.h + +deflate.obj: deflate.c deflate.h zutil.h zlib.h zconf.h + +gzio.obj: gzio.c zutil.h zlib.h zconf.h + +infback.obj: infback.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h inffixed.h + +inffast.obj: inffast.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h + +inflate.obj: inflate.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h inffixed.h + +inftrees.obj: inftrees.c zutil.h zlib.h zconf.h inftrees.h + +trees.obj: trees.c zutil.h zlib.h zconf.h deflate.h trees.h + +uncompr.obj: uncompr.c zlib.h zconf.h + +zutil.obj: zutil.c zutil.h zlib.h zconf.h + +example.obj: example.c zlib.h zconf.h + +minigzip.obj: minigzip.c zlib.h zconf.h + + +# For the sake of the old Borland make, +# the command line is cut to fit in the MS-DOS 128 byte limit: +$(ZLIB_LIB): $(OBJ1) $(OBJ2) + -del $(ZLIB_LIB) + $(AR) $(ZLIB_LIB) $(OBJP1) + $(AR) $(ZLIB_LIB) $(OBJP2) + + +# testing +test: example.exe minigzip.exe + example + echo hello world | minigzip | minigzip -d + +example.exe: example.obj $(ZLIB_LIB) + $(LD) $(LDFLAGS) example.obj $(ZLIB_LIB) + +minigzip.exe: minigzip.obj $(ZLIB_LIB) + $(LD) $(LDFLAGS) minigzip.obj $(ZLIB_LIB) + + +# cleanup +clean: + -del *.obj + -del *.exe + -del *.lib + -del *.tds + -del zlib.bak + -del foo.gz + Added: external/zlib/contrib/pascal/zlibpas.pas ============================================================================== --- (empty file) +++ external/zlib/contrib/pascal/zlibpas.pas Tue Jan 3 07:42:59 2006 @@ -0,0 +1,236 @@ +(* zlibpas -- Pascal interface to the zlib data compression library + * + * Copyright (C) 2003 Cosmin Truta. + * Derived from original sources by Bob Dellaca. + * For conditions of distribution and use, see copyright notice in readme.txt + *) + +unit zlibpas; + +interface + +const + ZLIB_VERSION = '1.2.3'; + +type + alloc_func = function(opaque: Pointer; items, size: Integer): Pointer; + cdecl; + free_func = procedure(opaque, address: Pointer); + cdecl; + + in_func = function(opaque: Pointer; var buf: PByte): Integer; + cdecl; + out_func = function(opaque: Pointer; buf: PByte; size: Integer): Integer; + cdecl; + + z_streamp = ^z_stream; + z_stream = packed record + next_in: PChar; (* next input byte *) + avail_in: Integer; (* number of bytes available at next_in *) + total_in: LongInt; (* total nb of input bytes read so far *) + + next_out: PChar; (* next output byte should be put there *) + avail_out: Integer; (* remaining free space at next_out *) + total_out: LongInt; (* total nb of bytes output so far *) + + msg: PChar; (* last error message, NULL if no error *) + state: Pointer; (* not visible by applications *) + + zalloc: alloc_func; (* used to allocate the internal state *) + zfree: free_func; (* used to free the internal state *) + opaque: Pointer; (* private data object passed to zalloc and zfree *) + + data_type: Integer; (* best guess about the data type: ascii or binary *) + adler: LongInt; (* adler32 value of the uncompressed data *) + reserved: LongInt; (* reserved for future use *) + end; + +(* constants *) +const + Z_NO_FLUSH = 0; + Z_PARTIAL_FLUSH = 1; + Z_SYNC_FLUSH = 2; + Z_FULL_FLUSH = 3; + Z_FINISH = 4; + + Z_OK = 0; + Z_STREAM_END = 1; + Z_NEED_DICT = 2; + Z_ERRNO = -1; + Z_STREAM_ERROR = -2; + Z_DATA_ERROR = -3; + Z_MEM_ERROR = -4; + Z_BUF_ERROR = -5; + Z_VERSION_ERROR = -6; + + Z_NO_COMPRESSION = 0; + Z_BEST_SPEED = 1; + Z_BEST_COMPRESSION = 9; + Z_DEFAULT_COMPRESSION = -1; + + Z_FILTERED = 1; + Z_HUFFMAN_ONLY = 2; + Z_RLE = 3; + Z_DEFAULT_STRATEGY = 0; + + Z_BINARY = 0; + Z_ASCII = 1; + Z_UNKNOWN = 2; + + Z_DEFLATED = 8; + +(* basic functions *) +function zlibVersion: PChar; +function deflateInit(var strm: z_stream; level: Integer): Integer; +function deflate(var strm: z_stream; flush: Integer): Integer; +function deflateEnd(var strm: z_stream): Integer; +function inflateInit(var strm: z_stream): Integer; +function inflate(var strm: z_stream; flush: Integer): Integer; +function inflateEnd(var strm: z_stream): Integer; + +(* advanced functions *) +function deflateInit2(var strm: z_stream; level, method, windowBits, + memLevel, strategy: Integer): Integer; +function deflateSetDictionary(var strm: z_stream; const dictionary: PChar; + dictLength: Integer): Integer; +function deflateCopy(var dest, source: z_stream): Integer; +function deflateReset(var strm: z_stream): Integer; +function deflateParams(var strm: z_stream; level, strategy: Integer): Integer; +function deflateBound(var strm: z_stream; sourceLen: LongInt): LongInt; +function deflatePrime(var strm: z_stream; bits, value: Integer): Integer; +function inflateInit2(var strm: z_stream; windowBits: Integer): Integer; +function inflateSetDictionary(var strm: z_stream; const dictionary: PChar; + dictLength: Integer): Integer; +function inflateSync(var strm: z_stream): Integer; +function inflateCopy(var dest, source: z_stream): Integer; +function inflateReset(var strm: z_stream): Integer; +function inflateBackInit(var strm: z_stream; + windowBits: Integer; window: PChar): Integer; +function inflateBack(var strm: z_stream; in_fn: in_func; in_desc: Pointer; + out_fn: out_func; out_desc: Pointer): Integer; +function inflateBackEnd(var strm: z_stream): Integer; +function zlibCompileFlags: LongInt; + +(* utility functions *) +function compress(dest: PChar; var destLen: LongInt; + const source: PChar; sourceLen: LongInt): Integer; +function compress2(dest: PChar; var destLen: LongInt; + const source: PChar; sourceLen: LongInt; + level: Integer): Integer; +function compressBound(sourceLen: LongInt): LongInt; +function uncompress(dest: PChar; var destLen: LongInt; + const source: PChar; sourceLen: LongInt): Integer; + +(* checksum functions *) +function adler32(adler: LongInt; const buf: PChar; len: Integer): LongInt; +function crc32(crc: LongInt; const buf: PChar; len: Integer): LongInt; + +(* various hacks, don't look :) *) +function deflateInit_(var strm: z_stream; level: Integer; + const version: PChar; stream_size: Integer): Integer; +function inflateInit_(var strm: z_stream; const version: PChar; + stream_size: Integer): Integer; +function deflateInit2_(var strm: z_stream; + level, method, windowBits, memLevel, strategy: Integer; + const version: PChar; stream_size: Integer): Integer; +function inflateInit2_(var strm: z_stream; windowBits: Integer; + const version: PChar; stream_size: Integer): Integer; +function inflateBackInit_(var strm: z_stream; + windowBits: Integer; window: PChar; + const version: PChar; stream_size: Integer): Integer; + + +implementation + +{$L adler32.obj} +{$L compress.obj} +{$L crc32.obj} +{$L deflate.obj} +{$L infback.obj} +{$L inffast.obj} +{$L inflate.obj} +{$L inftrees.obj} +{$L trees.obj} +{$L uncompr.obj} +{$L zutil.obj} + +function adler32; external; +function compress; external; +function compress2; external; +function compressBound; external; +function crc32; external; +function deflate; external; +function deflateBound; external; +function deflateCopy; external; +function deflateEnd; external; +function deflateInit_; external; +function deflateInit2_; external; +function deflateParams; external; +function deflatePrime; external; +function deflateReset; external; +function deflateSetDictionary; external; +function inflate; external; +function inflateBack; external; +function inflateBackEnd; external; +function inflateBackInit_; external; +function inflateCopy; external; +function inflateEnd; external; +function inflateInit_; external; +function inflateInit2_; external; +function inflateReset; external; +function inflateSetDictionary; external; +function inflateSync; external; +function uncompress; external; +function zlibCompileFlags; external; +function zlibVersion; external; + +function deflateInit(var strm: z_stream; level: Integer): Integer; +begin + Result := deflateInit_(strm, level, ZLIB_VERSION, sizeof(z_stream)); +end; + +function deflateInit2(var strm: z_stream; level, method, windowBits, memLevel, + strategy: Integer): Integer; +begin + Result := deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + ZLIB_VERSION, sizeof(z_stream)); +end; + +function inflateInit(var strm: z_stream): Integer; +begin + Result := inflateInit_(strm, ZLIB_VERSION, sizeof(z_stream)); +end; + +function inflateInit2(var strm: z_stream; windowBits: Integer): Integer; +begin + Result := inflateInit2_(strm, windowBits, ZLIB_VERSION, sizeof(z_stream)); +end; + +function inflateBackInit(var strm: z_stream; + windowBits: Integer; window: PChar): Integer; +begin + Result := inflateBackInit_(strm, windowBits, window, + ZLIB_VERSION, sizeof(z_stream)); +end; + +function _malloc(Size: Integer): Pointer; cdecl; +begin + GetMem(Result, Size); +end; + +procedure _free(Block: Pointer); cdecl; +begin + FreeMem(Block); +end; + +procedure _memset(P: Pointer; B: Byte; count: Integer); cdecl; +begin + FillChar(P^, count, B); +end; + +procedure _memcpy(dest, source: Pointer; count: Integer); cdecl; +begin + Move(source^, dest^, count); +end; + +end. Added: external/zlib/contrib/puff/Makefile ============================================================================== --- (empty file) +++ external/zlib/contrib/puff/Makefile Tue Jan 3 07:42:59 2006 @@ -0,0 +1,8 @@ +puff: puff.c puff.h + cc -DTEST -o puff puff.c + +test: puff + puff zeros.raw + +clean: + rm -f puff puff.o Added: external/zlib/contrib/puff/README ============================================================================== --- (empty file) +++ external/zlib/contrib/puff/README Tue Jan 3 07:42:59 2006 @@ -0,0 +1,63 @@ +Puff -- A Simple Inflate +3 Mar 2003 +Mark Adler +madler at alumni.caltech.edu + +What this is -- + +puff.c provides the routine puff() to decompress the deflate data format. It +does so more slowly than zlib, but the code is about one-fifth the size of the +inflate code in zlib, and written to be very easy to read. + +Why I wrote this -- + +puff.c was written to document the deflate format unambiguously, by virtue of +being working C code. It is meant to supplement RFC 1951, which formally +describes the deflate format. I have received many questions on details of the +deflate format, and I hope that reading this code will answer those questions. +puff.c is heavily commented with details of the deflate format, especially +those little nooks and cranies of the format that might not be obvious from a +specification. + +puff.c may also be useful in applications where code size or memory usage is a +very limited resource, and speed is not as important. + +How to use it -- + +Well, most likely you should just be reading puff.c and using zlib for actual +applications, but if you must ... + +Include puff.h in your code, which provides this prototype: + +int puff(unsigned char *dest, /* pointer to destination pointer */ + unsigned long *destlen, /* amount of output space */ + unsigned char *source, /* pointer to source data pointer */ + unsigned long *sourcelen); /* amount of input available */ + +Then you can call puff() to decompress a deflate stream that is in memory in +its entirety at source, to a sufficiently sized block of memory for the +decompressed data at dest. puff() is the only external symbol in puff.c The +only C library functions that puff.c needs are setjmp() and longjmp(), which +are used to simplify error checking in the code to improve readabilty. puff.c +does no memory allocation, and uses less than 2K bytes off of the stack. + +If destlen is not enough space for the uncompressed data, then inflate will +return an error without writing more than destlen bytes. Note that this means +that in order to decompress the deflate data successfully, you need to know +the size of the uncompressed data ahead of time. + +If needed, puff() can determine the size of the uncompressed data with no +output space. This is done by passing dest equal to (unsigned char *)0. Then +the initial value of *destlen is ignored and *destlen is set to the length of +the uncompressed data. So if the size of the uncompressed data is not known, +then two passes of puff() can be used--first to determine the size, and second +to do the actual inflation after allocating the appropriate memory. Not +pretty, but it works. (This is one of the reasons you should be using zlib.) + +The deflate format is self-terminating. If the deflate stream does not end +in *sourcelen bytes, puff() will return an error without reading at or past +endsource. + +On return, *sourcelen is updated to the amount of input data consumed, and +*destlen is updated to the size of the uncompressed data. See the comments +in puff.c for the possible return codes for puff(). Added: external/zlib/contrib/puff/puff.c ============================================================================== --- (empty file) +++ external/zlib/contrib/puff/puff.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,837 @@ +/* + * puff.c + * Copyright (C) 2002-2004 Mark Adler + * For conditions of distribution and use, see copyright notice in puff.h + * version 1.8, 9 Jan 2004 + * + * puff.c is a simple inflate written to be an unambiguous way to specify the + * deflate format. It is not written for speed but rather simplicity. As a + * side benefit, this code might actually be useful when small code is more + * important than speed, such as bootstrap applications. For typical deflate + * data, zlib's inflate() is about four times as fast as puff(). zlib's + * inflate compiles to around 20K on my machine, whereas puff.c compiles to + * around 4K on my machine (a PowerPC using GNU cc). If the faster decode() + * function here is used, then puff() is only twice as slow as zlib's + * inflate(). + * + * All dynamically allocated memory comes from the stack. The stack required + * is less than 2K bytes. This code is compatible with 16-bit int's and + * assumes that long's are at least 32 bits. puff.c uses the short data type, + * assumed to be 16 bits, for arrays in order to to conserve memory. The code + * works whether integers are stored big endian or little endian. + * + * In the comments below are "Format notes" that describe the inflate process + * and document some of the less obvious aspects of the format. This source + * code is meant to supplement RFC 1951, which formally describes the deflate + * format: + * + * http://www.zlib.org/rfc-deflate.html + */ + +/* + * Change history: + * + * 1.0 10 Feb 2002 - First version + * 1.1 17 Feb 2002 - Clarifications of some comments and notes + * - Update puff() dest and source pointers on negative + * errors to facilitate debugging deflators + * - Remove longest from struct huffman -- not needed + * - Simplify offs[] index in construct() + * - Add input size and checking, using longjmp() to + * maintain easy readability + * - Use short data type for large arrays + * - Use pointers instead of long to specify source and + * destination sizes to avoid arbitrary 4 GB limits + * 1.2 17 Mar 2002 - Add faster version of decode(), doubles speed (!), + * but leave simple version for readabilty + * - Make sure invalid distances detected if pointers + * are 16 bits + * - Fix fixed codes table error + * - Provide a scanning mode for determining size of + * uncompressed data + * 1.3 20 Mar 2002 - Go back to lengths for puff() parameters [Jean-loup] + * - Add a puff.h file for the interface + * - Add braces in puff() for else do [Jean-loup] + * - Use indexes instead of pointers for readability + * 1.4 31 Mar 2002 - Simplify construct() code set check + * - Fix some comments + * - Add FIXLCODES #define + * 1.5 6 Apr 2002 - Minor comment fixes + * 1.6 7 Aug 2002 - Minor format changes + * 1.7 3 Mar 2003 - Added test code for distribution + * - Added zlib-like license + * 1.8 9 Jan 2004 - Added some comments on no distance codes case + */ + +#include /* for setjmp(), longjmp(), and jmp_buf */ +#include "puff.h" /* prototype for puff() */ + +#define local static /* for local function definitions */ +#define NIL ((unsigned char *)0) /* for no output option */ + +/* + * Maximums for allocations and loops. It is not useful to change these -- + * they are fixed by the deflate format. + */ +#define MAXBITS 15 /* maximum bits in a code */ +#define MAXLCODES 286 /* maximum number of literal/length codes */ +#define MAXDCODES 30 /* maximum number of distance codes */ +#define MAXCODES (MAXLCODES+MAXDCODES) /* maximum codes lengths to read */ +#define FIXLCODES 288 /* number of fixed literal/length codes */ + +/* input and output state */ +struct state { + /* output state */ + unsigned char *out; /* output buffer */ + unsigned long outlen; /* available space at out */ + unsigned long outcnt; /* bytes written to out so far */ + + /* input state */ + unsigned char *in; /* input buffer */ + unsigned long inlen; /* available input at in */ + unsigned long incnt; /* bytes read so far */ + int bitbuf; /* bit buffer */ + int bitcnt; /* number of bits in bit buffer */ + + /* input limit error return state for bits() and decode() */ + jmp_buf env; +}; + +/* + * Return need bits from the input stream. This always leaves less than + * eight bits in the buffer. bits() works properly for need == 0. + * + * Format notes: + * + * - Bits are stored in bytes from the least significant bit to the most + * significant bit. Therefore bits are dropped from the bottom of the bit + * buffer, using shift right, and new bytes are appended to the top of the + * bit buffer, using shift left. + */ +local int bits(struct state *s, int need) +{ + long val; /* bit accumulator (can use up to 20 bits) */ + + /* load at least need bits into val */ + val = s->bitbuf; + while (s->bitcnt < need) { + if (s->incnt == s->inlen) longjmp(s->env, 1); /* out of input */ + val |= (long)(s->in[s->incnt++]) << s->bitcnt; /* load eight bits */ + s->bitcnt += 8; + } + + /* drop need bits and update buffer, always zero to seven bits left */ + s->bitbuf = (int)(val >> need); + s->bitcnt -= need; + + /* return need bits, zeroing the bits above that */ + return (int)(val & ((1L << need) - 1)); +} + +/* + * Process a stored block. + * + * Format notes: + * + * - After the two-bit stored block type (00), the stored block length and + * stored bytes are byte-aligned for fast copying. Therefore any leftover + * bits in the byte that has the last bit of the type, as many as seven, are + * discarded. The value of the discarded bits are not defined and should not + * be checked against any expectation. + * + * - The second inverted copy of the stored block length does not have to be + * checked, but it's probably a good idea to do so anyway. + * + * - A stored block can have zero length. This is sometimes used to byte-align + * subsets of the compressed data for random access or partial recovery. + */ +local int stored(struct state *s) +{ + unsigned len; /* length of stored block */ + + /* discard leftover bits from current byte (assumes s->bitcnt < 8) */ + s->bitbuf = 0; + s->bitcnt = 0; + + /* get length and check against its one's complement */ + if (s->incnt + 4 > s->inlen) return 2; /* not enough input */ + len = s->in[s->incnt++]; + len |= s->in[s->incnt++] << 8; + if (s->in[s->incnt++] != (~len & 0xff) || + s->in[s->incnt++] != ((~len >> 8) & 0xff)) + return -2; /* didn't match complement! */ + + /* copy len bytes from in to out */ + if (s->incnt + len > s->inlen) return 2; /* not enough input */ + if (s->out != NIL) { + if (s->outcnt + len > s->outlen) + return 1; /* not enough output space */ + while (len--) + s->out[s->outcnt++] = s->in[s->incnt++]; + } + else { /* just scanning */ + s->outcnt += len; + s->incnt += len; + } + + /* done with a valid stored block */ + return 0; +} + +/* + * Huffman code decoding tables. count[1..MAXBITS] is the number of symbols of + * each length, which for a canonical code are stepped through in order. + * symbol[] are the symbol values in canonical order, where the number of + * entries is the sum of the counts in count[]. The decoding process can be + * seen in the function decode() below. + */ +struct huffman { + short *count; /* number of symbols of each length */ + short *symbol; /* canonically ordered symbols */ +}; + +/* + * Decode a code from the stream s using huffman table h. Return the symbol or + * a negative value if there is an error. If all of the lengths are zero, i.e. + * an empty code, or if the code is incomplete and an invalid code is received, + * then -9 is returned after reading MAXBITS bits. + * + * Format notes: + * + * - The codes as stored in the compressed data are bit-reversed relative to + * a simple integer ordering of codes of the same lengths. Hence below the + * bits are pulled from the compressed data one at a time and used to + * build the code value reversed from what is in the stream in order to + * permit simple integer comparisons for decoding. A table-based decoding + * scheme (as used in zlib) does not need to do this reversal. + * + * - The first code for the shortest length is all zeros. Subsequent codes of + * the same length are simply integer increments of the previous code. When + * moving up a length, a zero bit is appended to the code. For a complete + * code, the last code of the longest length will be all ones. + * + * - Incomplete codes are handled by this decoder, since they are permitted + * in the deflate format. See the format notes for fixed() and dynamic(). + */ +#ifdef SLOW +local int decode(struct state *s, struct huffman *h) +{ + int len; /* current number of bits in code */ + int code; /* len bits being decoded */ + int first; /* first code of length len */ + int count; /* number of codes of length len */ + int index; /* index of first code of length len in symbol table */ + + code = first = index = 0; + for (len = 1; len <= MAXBITS; len++) { + code |= bits(s, 1); /* get next bit */ + count = h->count[len]; + if (code < first + count) /* if length len, return symbol */ + return h->symbol[index + (code - first)]; + index += count; /* else update for next length */ + first += count; + first <<= 1; + code <<= 1; + } + return -9; /* ran out of codes */ +} + +/* + * A faster version of decode() for real applications of this code. It's not + * as readable, but it makes puff() twice as fast. And it only makes the code + * a few percent larger. + */ +#else /* !SLOW */ +local int decode(struct state *s, struct huffman *h) +{ + int len; /* current number of bits in code */ + int code; /* len bits being decoded */ + int first; /* first code of length len */ + int count; /* number of codes of length len */ + int index; /* index of first code of length len in symbol table */ + int bitbuf; /* bits from stream */ + int left; /* bits left in next or left to process */ + short *next; /* next number of codes */ + + bitbuf = s->bitbuf; + left = s->bitcnt; + code = first = index = 0; + len = 1; + next = h->count + 1; + while (1) { + while (left--) { + code |= bitbuf & 1; + bitbuf >>= 1; + count = *next++; + if (code < first + count) { /* if length len, return symbol */ + s->bitbuf = bitbuf; + s->bitcnt = (s->bitcnt - len) & 7; + return h->symbol[index + (code - first)]; + } + index += count; /* else update for next length */ + first += count; + first <<= 1; + code <<= 1; + len++; + } + left = (MAXBITS+1) - len; + if (left == 0) break; + if (s->incnt == s->inlen) longjmp(s->env, 1); /* out of input */ + bitbuf = s->in[s->incnt++]; + if (left > 8) left = 8; + } + return -9; /* ran out of codes */ +} +#endif /* SLOW */ + +/* + * Given the list of code lengths length[0..n-1] representing a canonical + * Huffman code for n symbols, construct the tables required to decode those + * codes. Those tables are the number of codes of each length, and the symbols + * sorted by length, retaining their original order within each length. The + * return value is zero for a complete code set, negative for an over- + * subscribed code set, and positive for an incomplete code set. The tables + * can be used if the return value is zero or positive, but they cannot be used + * if the return value is negative. If the return value is zero, it is not + * possible for decode() using that table to return an error--any stream of + * enough bits will resolve to a symbol. If the return value is positive, then + * it is possible for decode() using that table to return an error for received + * codes past the end of the incomplete lengths. + * + * Not used by decode(), but used for error checking, h->count[0] is the number + * of the n symbols not in the code. So n - h->count[0] is the number of + * codes. This is useful for checking for incomplete codes that have more than + * one symbol, which is an error in a dynamic block. + * + * Assumption: for all i in 0..n-1, 0 <= length[i] <= MAXBITS + * This is assured by the construction of the length arrays in dynamic() and + * fixed() and is not verified by construct(). + * + * Format notes: + * + * - Permitted and expected examples of incomplete codes are one of the fixed + * codes and any code with a single symbol which in deflate is coded as one + * bit instead of zero bits. See the format notes for fixed() and dynamic(). + * + * - Within a given code length, the symbols are kept in ascending order for + * the code bits definition. + */ +local int construct(struct huffman *h, short *length, int n) +{ + int symbol; /* current symbol when stepping through length[] */ + int len; /* current length when stepping through h->count[] */ + int left; /* number of possible codes left of current length */ + short offs[MAXBITS+1]; /* offsets in symbol table for each length */ + + /* count number of codes of each length */ + for (len = 0; len <= MAXBITS; len++) + h->count[len] = 0; + for (symbol = 0; symbol < n; symbol++) + (h->count[length[symbol]])++; /* assumes lengths are within bounds */ + if (h->count[0] == n) /* no codes! */ + return 0; /* complete, but decode() will fail */ + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; /* one possible code of zero length */ + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; /* one more bit, double codes left */ + left -= h->count[len]; /* deduct count from possible codes */ + if (left < 0) return left; /* over-subscribed--return negative */ + } /* left > 0 means incomplete */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + h->count[len]; + + /* + * put symbols in table sorted by length, by symbol order within each + * length + */ + for (symbol = 0; symbol < n; symbol++) + if (length[symbol] != 0) + h->symbol[offs[length[symbol]]++] = symbol; + + /* return zero for complete set, positive for incomplete set */ + return left; +} + +/* + * Decode literal/length and distance codes until an end-of-block code. + * + * Format notes: + * + * - Compressed data that is after the block type if fixed or after the code + * description if dynamic is a combination of literals and length/distance + * pairs terminated by and end-of-block code. Literals are simply Huffman + * coded bytes. A length/distance pair is a coded length followed by a + * coded distance to represent a string that occurs earlier in the + * uncompressed data that occurs again at the current location. + * + * - Literals, lengths, and the end-of-block code are combined into a single + * code of up to 286 symbols. They are 256 literals (0..255), 29 length + * symbols (257..285), and the end-of-block symbol (256). + * + * - There are 256 possible lengths (3..258), and so 29 symbols are not enough + * to represent all of those. Lengths 3..10 and 258 are in fact represented + * by just a length symbol. Lengths 11..257 are represented as a symbol and + * some number of extra bits that are added as an integer to the base length + * of the length symbol. The number of extra bits is determined by the base + * length symbol. These are in the static arrays below, lens[] for the base + * lengths and lext[] for the corresponding number of extra bits. + * + * - The reason that 258 gets its own symbol is that the longest length is used + * often in highly redundant files. Note that 258 can also be coded as the + * base value 227 plus the maximum extra value of 31. While a good deflate + * should never do this, it is not an error, and should be decoded properly. + * + * - If a length is decoded, including its extra bits if any, then it is + * followed a distance code. There are up to 30 distance symbols. Again + * there are many more possible distances (1..32768), so extra bits are added + * to a base value represented by the symbol. The distances 1..4 get their + * own symbol, but the rest require extra bits. The base distances and + * corresponding number of extra bits are below in the static arrays dist[] + * and dext[]. + * + * - Literal bytes are simply written to the output. A length/distance pair is + * an instruction to copy previously uncompressed bytes to the output. The + * copy is from distance bytes back in the output stream, copying for length + * bytes. + * + * - Distances pointing before the beginning of the output data are not + * permitted. + * + * - Overlapped copies, where the length is greater than the distance, are + * allowed and common. For example, a distance of one and a length of 258 + * simply copies the last byte 258 times. A distance of four and a length of + * twelve copies the last four bytes three times. A simple forward copy + * ignoring whether the length is greater than the distance or not implements + * this correctly. You should not use memcpy() since its behavior is not + * defined for overlapped arrays. You should not use memmove() or bcopy() + * since though their behavior -is- defined for overlapping arrays, it is + * defined to do the wrong thing in this case. + */ +local int codes(struct state *s, + struct huffman *lencode, + struct huffman *distcode) +{ + int symbol; /* decoded symbol */ + int len; /* length for copy */ + unsigned dist; /* distance for copy */ + static const short lens[29] = { /* Size base for length codes 257..285 */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258}; + static const short lext[29] = { /* Extra bits for length codes 257..285 */ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0}; + static const short dists[30] = { /* Offset base for distance codes 0..29 */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577}; + static const short dext[30] = { /* Extra bits for distance codes 0..29 */ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13}; + + /* decode literals and length/distance pairs */ + do { + symbol = decode(s, lencode); + if (symbol < 0) return symbol; /* invalid symbol */ + if (symbol < 256) { /* literal: symbol is the byte */ + /* write out the literal */ + if (s->out != NIL) { + if (s->outcnt == s->outlen) return 1; + s->out[s->outcnt] = symbol; + } + s->outcnt++; + } + else if (symbol > 256) { /* length */ + /* get and compute length */ + symbol -= 257; + if (symbol >= 29) return -9; /* invalid fixed code */ + len = lens[symbol] + bits(s, lext[symbol]); + + /* get and check distance */ + symbol = decode(s, distcode); + if (symbol < 0) return symbol; /* invalid symbol */ + dist = dists[symbol] + bits(s, dext[symbol]); + if (dist > s->outcnt) + return -10; /* distance too far back */ + + /* copy length bytes from distance bytes back */ + if (s->out != NIL) { + if (s->outcnt + len > s->outlen) return 1; + while (len--) { + s->out[s->outcnt] = s->out[s->outcnt - dist]; + s->outcnt++; + } + } + else + s->outcnt += len; + } + } while (symbol != 256); /* end of block symbol */ + + /* done with a valid fixed or dynamic block */ + return 0; +} + +/* + * Process a fixed codes block. + * + * Format notes: + * + * - This block type can be useful for compressing small amounts of data for + * which the size of the code descriptions in a dynamic block exceeds the + * benefit of custom codes for that block. For fixed codes, no bits are + * spent on code descriptions. Instead the code lengths for literal/length + * codes and distance codes are fixed. The specific lengths for each symbol + * can be seen in the "for" loops below. + * + * - The literal/length code is complete, but has two symbols that are invalid + * and should result in an error if received. This cannot be implemented + * simply as an incomplete code since those two symbols are in the "middle" + * of the code. They are eight bits long and the longest literal/length\ + * code is nine bits. Therefore the code must be constructed with those + * symbols, and the invalid symbols must be detected after decoding. + * + * - The fixed distance codes also have two invalid symbols that should result + * in an error if received. Since all of the distance codes are the same + * length, this can be implemented as an incomplete code. Then the invalid + * codes are detected while decoding. + */ +local int fixed(struct state *s) +{ + static int virgin = 1; + static short lencnt[MAXBITS+1], lensym[FIXLCODES]; + static short distcnt[MAXBITS+1], distsym[MAXDCODES]; + static struct huffman lencode = {lencnt, lensym}; + static struct huffman distcode = {distcnt, distsym}; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + int symbol; + short lengths[FIXLCODES]; + + /* literal/length table */ + for (symbol = 0; symbol < 144; symbol++) + lengths[symbol] = 8; + for (; symbol < 256; symbol++) + lengths[symbol] = 9; + for (; symbol < 280; symbol++) + lengths[symbol] = 7; + for (; symbol < FIXLCODES; symbol++) + lengths[symbol] = 8; + construct(&lencode, lengths, FIXLCODES); + + /* distance table */ + for (symbol = 0; symbol < MAXDCODES; symbol++) + lengths[symbol] = 5; + construct(&distcode, lengths, MAXDCODES); + + /* do this just once */ + virgin = 0; + } + + /* decode data until end-of-block code */ + return codes(s, &lencode, &distcode); +} + +/* + * Process a dynamic codes block. + * + * Format notes: + * + * - A dynamic block starts with a description of the literal/length and + * distance codes for that block. New dynamic blocks allow the compressor to + * rapidly adapt to changing data with new codes optimized for that data. + * + * - The codes used by the deflate format are "canonical", which means that + * the actual bits of the codes are generated in an unambiguous way simply + * from the number of bits in each code. Therefore the code descriptions + * are simply a list of code lengths for each symbol. + * + * - The code lengths are stored in order for the symbols, so lengths are + * provided for each of the literal/length symbols, and for each of the + * distance symbols. + * + * - If a symbol is not used in the block, this is represented by a zero as + * as the code length. This does not mean a zero-length code, but rather + * that no code should be created for this symbol. There is no way in the + * deflate format to represent a zero-length code. + * + * - The maximum number of bits in a code is 15, so the possible lengths for + * any code are 1..15. + * + * - The fact that a length of zero is not permitted for a code has an + * interesting consequence. Normally if only one symbol is used for a given + * code, then in fact that code could be represented with zero bits. However + * in deflate, that code has to be at least one bit. So for example, if + * only a single distance base symbol appears in a block, then it will be + * represented by a single code of length one, in particular one 0 bit. This + * is an incomplete code, since if a 1 bit is received, it has no meaning, + * and should result in an error. So incomplete distance codes of one symbol + * should be permitted, and the receipt of invalid codes should be handled. + * + * - It is also possible to have a single literal/length code, but that code + * must be the end-of-block code, since every dynamic block has one. This + * is not the most efficient way to create an empty block (an empty fixed + * block is fewer bits), but it is allowed by the format. So incomplete + * literal/length codes of one symbol should also be permitted. + * + * - If there are only literal codes and no lengths, then there are no distance + * codes. This is represented by one distance code with zero bits. + * + * - The list of up to 286 length/literal lengths and up to 30 distance lengths + * are themselves compressed using Huffman codes and run-length encoding. In + * the list of code lengths, a 0 symbol means no code, a 1..15 symbol means + * that length, and the symbols 16, 17, and 18 are run-length instructions. + * Each of 16, 17, and 18 are follwed by extra bits to define the length of + * the run. 16 copies the last length 3 to 6 times. 17 represents 3 to 10 + * zero lengths, and 18 represents 11 to 138 zero lengths. Unused symbols + * are common, hence the special coding for zero lengths. + * + * - The symbols for 0..18 are Huffman coded, and so that code must be + * described first. This is simply a sequence of up to 19 three-bit values + * representing no code (0) or the code length for that symbol (1..7). + * + * - A dynamic block starts with three fixed-size counts from which is computed + * the number of literal/length code lengths, the number of distance code + * lengths, and the number of code length code lengths (ok, you come up with + * a better name!) in the code descriptions. For the literal/length and + * distance codes, lengths after those provided are considered zero, i.e. no + * code. The code length code lengths are received in a permuted order (see + * the order[] array below) to make a short code length code length list more + * likely. As it turns out, very short and very long codes are less likely + * to be seen in a dynamic code description, hence what may appear initially + * to be a peculiar ordering. + * + * - Given the number of literal/length code lengths (nlen) and distance code + * lengths (ndist), then they are treated as one long list of nlen + ndist + * code lengths. Therefore run-length coding can and often does cross the + * boundary between the two sets of lengths. + * + * - So to summarize, the code description at the start of a dynamic block is + * three counts for the number of code lengths for the literal/length codes, + * the distance codes, and the code length codes. This is followed by the + * code length code lengths, three bits each. This is used to construct the + * code length code which is used to read the remainder of the lengths. Then + * the literal/length code lengths and distance lengths are read as a single + * set of lengths using the code length codes. Codes are constructed from + * the resulting two sets of lengths, and then finally you can start + * decoding actual compressed data in the block. + * + * - For reference, a "typical" size for the code description in a dynamic + * block is around 80 bytes. + */ +local int dynamic(struct state *s) +{ + int nlen, ndist, ncode; /* number of lengths in descriptor */ + int index; /* index of lengths[] */ + int err; /* construct() return value */ + short lengths[MAXCODES]; /* descriptor code lengths */ + short lencnt[MAXBITS+1], lensym[MAXLCODES]; /* lencode memory */ + short distcnt[MAXBITS+1], distsym[MAXDCODES]; /* distcode memory */ + struct huffman lencode = {lencnt, lensym}; /* length code */ + struct huffman distcode = {distcnt, distsym}; /* distance code */ + static const short order[19] = /* permutation of code length codes */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + /* get number of lengths in each table, check lengths */ + nlen = bits(s, 5) + 257; + ndist = bits(s, 5) + 1; + ncode = bits(s, 4) + 4; + if (nlen > MAXLCODES || ndist > MAXDCODES) + return -3; /* bad counts */ + + /* read code length code lengths (really), missing lengths are zero */ + for (index = 0; index < ncode; index++) + lengths[order[index]] = bits(s, 3); + for (; index < 19; index++) + lengths[order[index]] = 0; + + /* build huffman table for code lengths codes (use lencode temporarily) */ + err = construct(&lencode, lengths, 19); + if (err != 0) return -4; /* require complete code set here */ + + /* read length/literal and distance code length tables */ + index = 0; + while (index < nlen + ndist) { + int symbol; /* decoded value */ + int len; /* last length to repeat */ + + symbol = decode(s, &lencode); + if (symbol < 16) /* length in 0..15 */ + lengths[index++] = symbol; + else { /* repeat instruction */ + len = 0; /* assume repeating zeros */ + if (symbol == 16) { /* repeat last length 3..6 times */ + if (index == 0) return -5; /* no last length! */ + len = lengths[index - 1]; /* last length */ + symbol = 3 + bits(s, 2); + } + else if (symbol == 17) /* repeat zero 3..10 times */ + symbol = 3 + bits(s, 3); + else /* == 18, repeat zero 11..138 times */ + symbol = 11 + bits(s, 7); + if (index + symbol > nlen + ndist) + return -6; /* too many lengths! */ + while (symbol--) /* repeat last or zero symbol times */ + lengths[index++] = len; + } + } + + /* build huffman table for literal/length codes */ + err = construct(&lencode, lengths, nlen); + if (err < 0 || (err > 0 && nlen - lencode.count[0] != 1)) + return -7; /* only allow incomplete codes if just one code */ + + /* build huffman table for distance codes */ + err = construct(&distcode, lengths + nlen, ndist); + if (err < 0 || (err > 0 && ndist - distcode.count[0] != 1)) + return -8; /* only allow incomplete codes if just one code */ + + /* decode data until end-of-block code */ + return codes(s, &lencode, &distcode); +} + +/* + * Inflate source to dest. On return, destlen and sourcelen are updated to the + * size of the uncompressed data and the size of the deflate data respectively. + * On success, the return value of puff() is zero. If there is an error in the + * source data, i.e. it is not in the deflate format, then a negative value is + * returned. If there is not enough input available or there is not enough + * output space, then a positive error is returned. In that case, destlen and + * sourcelen are not updated to facilitate retrying from the beginning with the + * provision of more input data or more output space. In the case of invalid + * inflate data (a negative error), the dest and source pointers are updated to + * facilitate the debugging of deflators. + * + * puff() also has a mode to determine the size of the uncompressed output with + * no output written. For this dest must be (unsigned char *)0. In this case, + * the input value of *destlen is ignored, and on return *destlen is set to the + * size of the uncompressed output. + * + * The return codes are: + * + * 2: available inflate data did not terminate + * 1: output space exhausted before completing inflate + * 0: successful inflate + * -1: invalid block type (type == 3) + * -2: stored block length did not match one's complement + * -3: dynamic block code description: too many length or distance codes + * -4: dynamic block code description: code lengths codes incomplete + * -5: dynamic block code description: repeat lengths with no first length + * -6: dynamic block code description: repeat more than specified lengths + * -7: dynamic block code description: invalid literal/length code lengths + * -8: dynamic block code description: invalid distance code lengths + * -9: invalid literal/length or distance code in fixed or dynamic block + * -10: distance is too far back in fixed or dynamic block + * + * Format notes: + * + * - Three bits are read for each block to determine the kind of block and + * whether or not it is the last block. Then the block is decoded and the + * process repeated if it was not the last block. + * + * - The leftover bits in the last byte of the deflate data after the last + * block (if it was a fixed or dynamic block) are undefined and have no + * expected values to check. + */ +int puff(unsigned char *dest, /* pointer to destination pointer */ + unsigned long *destlen, /* amount of output space */ + unsigned char *source, /* pointer to source data pointer */ + unsigned long *sourcelen) /* amount of input available */ +{ + struct state s; /* input/output state */ + int last, type; /* block information */ + int err; /* return value */ + + /* initialize output state */ + s.out = dest; + s.outlen = *destlen; /* ignored if dest is NIL */ + s.outcnt = 0; + + /* initialize input state */ + s.in = source; + s.inlen = *sourcelen; + s.incnt = 0; + s.bitbuf = 0; + s.bitcnt = 0; + + /* return if bits() or decode() tries to read past available input */ + if (setjmp(s.env) != 0) /* if came back here via longjmp() */ + err = 2; /* then skip do-loop, return error */ + else { + /* process blocks until last block or error */ + do { + last = bits(&s, 1); /* one if last block */ + type = bits(&s, 2); /* block type 0..3 */ + err = type == 0 ? stored(&s) : + (type == 1 ? fixed(&s) : + (type == 2 ? dynamic(&s) : + -1)); /* type == 3, invalid */ + if (err != 0) break; /* return with error */ + } while (!last); + } + + /* update the lengths and return */ + if (err <= 0) { + *destlen = s.outcnt; + *sourcelen = s.incnt; + } + return err; +} + +#ifdef TEST +/* Example of how to use puff() */ +#include +#include +#include +#include + +local unsigned char *yank(char *name, unsigned long *len) +{ + unsigned long size; + unsigned char *buf; + FILE *in; + struct stat s; + + *len = 0; + if (stat(name, &s)) return NULL; + if ((s.st_mode & S_IFMT) != S_IFREG) return NULL; + size = (unsigned long)(s.st_size); + if (size == 0 || (off_t)size != s.st_size) return NULL; + in = fopen(name, "r"); + if (in == NULL) return NULL; + buf = malloc(size); + if (buf != NULL && fread(buf, 1, size, in) != size) { + free(buf); + buf = NULL; + } + fclose(in); + *len = size; + return buf; +} + +int main(int argc, char **argv) +{ + int ret; + unsigned char *source; + unsigned long len, sourcelen, destlen; + + if (argc < 2) return 2; + source = yank(argv[1], &len); + if (source == NULL) return 2; + sourcelen = len; + ret = puff(NIL, &destlen, source, &sourcelen); + if (ret) + printf("puff() failed with return code %d\n", ret); + else { + printf("puff() succeeded uncompressing %lu bytes\n", destlen); + if (sourcelen < len) printf("%lu compressed bytes unused\n", + len - sourcelen); + } + free(source); + return ret; +} +#endif Added: external/zlib/contrib/puff/puff.h ============================================================================== --- (empty file) +++ external/zlib/contrib/puff/puff.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,31 @@ +/* puff.h + Copyright (C) 2002, 2003 Mark Adler, all rights reserved + version 1.7, 3 Mar 2002 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler at alumni.caltech.edu + */ + + +/* + * See puff.c for purpose and usage. + */ +int puff(unsigned char *dest, /* pointer to destination pointer */ + unsigned long *destlen, /* amount of output space */ + unsigned char *source, /* pointer to source data pointer */ + unsigned long *sourcelen); /* amount of input available */ Added: external/zlib/contrib/puff/zeros.raw ============================================================================== Binary file. No diff available. Added: external/zlib/contrib/testzlib/testzlib.c ============================================================================== --- (empty file) +++ external/zlib/contrib/testzlib/testzlib.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,275 @@ +#include +#include +#include + +#include "zlib.h" + + +void MyDoMinus64(LARGE_INTEGER *R,LARGE_INTEGER A,LARGE_INTEGER B) +{ + R->HighPart = A.HighPart - B.HighPart; + if (A.LowPart >= B.LowPart) + R->LowPart = A.LowPart - B.LowPart; + else + { + R->LowPart = A.LowPart - B.LowPart; + R->HighPart --; + } +} + +#ifdef _M_X64 +// see http://msdn2.microsoft.com/library/twchhe95(en-us,vs.80).aspx for __rdtsc +unsigned __int64 __rdtsc(void); +void BeginCountRdtsc(LARGE_INTEGER * pbeginTime64) +{ + // printf("rdtsc = %I64x\n",__rdtsc()); + pbeginTime64->QuadPart=__rdtsc(); +} + +LARGE_INTEGER GetResRdtsc(LARGE_INTEGER beginTime64,BOOL fComputeTimeQueryPerf) +{ + LARGE_INTEGER LIres; + unsigned _int64 res=__rdtsc()-((unsigned _int64)(beginTime64.QuadPart)); + LIres.QuadPart=res; + // printf("rdtsc = %I64x\n",__rdtsc()); + return LIres; +} +#else +#ifdef _M_IX86 +void myGetRDTSC32(LARGE_INTEGER * pbeginTime64) +{ + DWORD dwEdx,dwEax; + _asm + { + rdtsc + mov dwEax,eax + mov dwEdx,edx + } + pbeginTime64->LowPart=dwEax; + pbeginTime64->HighPart=dwEdx; +} + +void BeginCountRdtsc(LARGE_INTEGER * pbeginTime64) +{ + myGetRDTSC32(pbeginTime64); +} + +LARGE_INTEGER GetResRdtsc(LARGE_INTEGER beginTime64,BOOL fComputeTimeQueryPerf) +{ + LARGE_INTEGER LIres,endTime64; + myGetRDTSC32(&endTime64); + + LIres.LowPart=LIres.HighPart=0; + MyDoMinus64(&LIres,endTime64,beginTime64); + return LIres; +} +#else +void myGetRDTSC32(LARGE_INTEGER * pbeginTime64) +{ +} + +void BeginCountRdtsc(LARGE_INTEGER * pbeginTime64) +{ +} + +LARGE_INTEGER GetResRdtsc(LARGE_INTEGER beginTime64,BOOL fComputeTimeQueryPerf) +{ + LARGE_INTEGER lr; + lr.QuadPart=0; + return lr; +} +#endif +#endif + +void BeginCountPerfCounter(LARGE_INTEGER * pbeginTime64,BOOL fComputeTimeQueryPerf) +{ + if ((!fComputeTimeQueryPerf) || (!QueryPerformanceCounter(pbeginTime64))) + { + pbeginTime64->LowPart = GetTickCount(); + pbeginTime64->HighPart = 0; + } +} + +DWORD GetMsecSincePerfCounter(LARGE_INTEGER beginTime64,BOOL fComputeTimeQueryPerf) +{ + LARGE_INTEGER endTime64,ticksPerSecond,ticks; + DWORDLONG ticksShifted,tickSecShifted; + DWORD dwLog=16+0; + DWORD dwRet; + if ((!fComputeTimeQueryPerf) || (!QueryPerformanceCounter(&endTime64))) + dwRet = (GetTickCount() - beginTime64.LowPart)*1; + else + { + MyDoMinus64(&ticks,endTime64,beginTime64); + QueryPerformanceFrequency(&ticksPerSecond); + + + { + ticksShifted = Int64ShrlMod32(*(DWORDLONG*)&ticks,dwLog); + tickSecShifted = Int64ShrlMod32(*(DWORDLONG*)&ticksPerSecond,dwLog); + + } + + dwRet = (DWORD)((((DWORD)ticksShifted)*1000)/(DWORD)(tickSecShifted)); + dwRet *=1; + } + return dwRet; +} + +int ReadFileMemory(const char* filename,long* plFileSize,void** pFilePtr) +{ + FILE* stream; + void* ptr; + int retVal=1; + stream=fopen(filename, "rb"); + if (stream==NULL) + return 0; + + fseek(stream,0,SEEK_END); + + *plFileSize=ftell(stream); + fseek(stream,0,SEEK_SET); + ptr=malloc((*plFileSize)+1); + if (ptr==NULL) + retVal=0; + else + { + if (fread(ptr, 1, *plFileSize,stream) != (*plFileSize)) + retVal=0; + } + fclose(stream); + *pFilePtr=ptr; + return retVal; +} + +int main(int argc, char *argv[]) +{ + int BlockSizeCompress=0x8000; + int BlockSizeUncompress=0x8000; + int cprLevel=Z_DEFAULT_COMPRESSION ; + long lFileSize; + unsigned char* FilePtr; + long lBufferSizeCpr; + long lBufferSizeUncpr; + long lCompressedSize=0; + unsigned char* CprPtr; + unsigned char* UncprPtr; + long lSizeCpr,lSizeUncpr; + DWORD dwGetTick,dwMsecQP; + LARGE_INTEGER li_qp,li_rdtsc,dwResRdtsc; + + if (argc<=1) + { + printf("run TestZlib [BlockSizeCompress] [BlockSizeUncompress] [compres. level]\n"); + return 0; + } + + if (ReadFileMemory(argv[1],&lFileSize,&FilePtr)==0) + { + printf("error reading %s\n",argv[1]); + return 1; + } + else printf("file %s read, %u bytes\n",argv[1],lFileSize); + + if (argc>=3) + BlockSizeCompress=atol(argv[2]); + + if (argc>=4) + BlockSizeUncompress=atol(argv[3]); + + if (argc>=5) + cprLevel=(int)atol(argv[4]); + + lBufferSizeCpr = lFileSize + (lFileSize/0x10) + 0x200; + lBufferSizeUncpr = lBufferSizeCpr; + + CprPtr=(unsigned char*)malloc(lBufferSizeCpr + BlockSizeCompress); + + BeginCountPerfCounter(&li_qp,TRUE); + dwGetTick=GetTickCount(); + BeginCountRdtsc(&li_rdtsc); + { + z_stream zcpr; + int ret=Z_OK; + long lOrigToDo = lFileSize; + long lOrigDone = 0; + int step=0; + memset(&zcpr,0,sizeof(z_stream)); + deflateInit(&zcpr,cprLevel); + + zcpr.next_in = FilePtr; + zcpr.next_out = CprPtr; + + + do + { + long all_read_before = zcpr.total_in; + zcpr.avail_in = min(lOrigToDo,BlockSizeCompress); + zcpr.avail_out = BlockSizeCompress; + ret=deflate(&zcpr,(zcpr.avail_in==lOrigToDo) ? Z_FINISH : Z_SYNC_FLUSH); + lOrigDone += (zcpr.total_in-all_read_before); + lOrigToDo -= (zcpr.total_in-all_read_before); + step++; + } while (ret==Z_OK); + + lSizeCpr=zcpr.total_out; + deflateEnd(&zcpr); + dwGetTick=GetTickCount()-dwGetTick; + dwMsecQP=GetMsecSincePerfCounter(li_qp,TRUE); + dwResRdtsc=GetResRdtsc(li_rdtsc,TRUE); + printf("total compress size = %u, in %u step\n",lSizeCpr,step); + printf("time = %u msec = %f sec\n",dwGetTick,dwGetTick/(double)1000.); + printf("defcpr time QP = %u msec = %f sec\n",dwMsecQP,dwMsecQP/(double)1000.); + printf("defcpr result rdtsc = %I64x\n\n",dwResRdtsc.QuadPart); + } + + CprPtr=(unsigned char*)realloc(CprPtr,lSizeCpr); + UncprPtr=(unsigned char*)malloc(lBufferSizeUncpr + BlockSizeUncompress); + + BeginCountPerfCounter(&li_qp,TRUE); + dwGetTick=GetTickCount(); + BeginCountRdtsc(&li_rdtsc); + { + z_stream zcpr; + int ret=Z_OK; + long lOrigToDo = lSizeCpr; + long lOrigDone = 0; + int step=0; + memset(&zcpr,0,sizeof(z_stream)); + inflateInit(&zcpr); + + zcpr.next_in = CprPtr; + zcpr.next_out = UncprPtr; + + + do + { + long all_read_before = zcpr.total_in; + zcpr.avail_in = min(lOrigToDo,BlockSizeUncompress); + zcpr.avail_out = BlockSizeUncompress; + ret=inflate(&zcpr,Z_SYNC_FLUSH); + lOrigDone += (zcpr.total_in-all_read_before); + lOrigToDo -= (zcpr.total_in-all_read_before); + step++; + } while (ret==Z_OK); + + lSizeUncpr=zcpr.total_out; + inflateEnd(&zcpr); + dwGetTick=GetTickCount()-dwGetTick; + dwMsecQP=GetMsecSincePerfCounter(li_qp,TRUE); + dwResRdtsc=GetResRdtsc(li_rdtsc,TRUE); + printf("total uncompress size = %u, in %u step\n",lSizeUncpr,step); + printf("time = %u msec = %f sec\n",dwGetTick,dwGetTick/(double)1000.); + printf("uncpr time QP = %u msec = %f sec\n",dwMsecQP,dwMsecQP/(double)1000.); + printf("uncpr result rdtsc = %I64x\n\n",dwResRdtsc.QuadPart); + } + + if (lSizeUncpr==lFileSize) + { + if (memcmp(FilePtr,UncprPtr,lFileSize)==0) + printf("compare ok\n"); + + } + + return 0; +} Added: external/zlib/contrib/testzlib/testzlib.txt ============================================================================== --- (empty file) +++ external/zlib/contrib/testzlib/testzlib.txt Tue Jan 3 07:42:59 2006 @@ -0,0 +1,10 @@ +To build testzLib with Visual Studio 2005: + +copy to a directory file from : +- root of zLib tree +- contrib/testzlib +- contrib/masmx86 +- contrib/masmx64 +- contrib/vstudio/vc7 + +and open testzlib8.sln \ No newline at end of file Added: external/zlib/contrib/untgz/Makefile ============================================================================== --- (empty file) +++ external/zlib/contrib/untgz/Makefile Tue Jan 3 07:42:59 2006 @@ -0,0 +1,14 @@ +CC=cc +CFLAGS=-g + +untgz: untgz.o ../../libz.a + $(CC) $(CFLAGS) -o untgz untgz.o -L../.. -lz + +untgz.o: untgz.c ../../zlib.h + $(CC) $(CFLAGS) -c -I../.. untgz.c + +../../libz.a: + cd ../..; ./configure; make + +clean: + rm -f untgz untgz.o *~ Added: external/zlib/contrib/untgz/Makefile.msc ============================================================================== --- (empty file) +++ external/zlib/contrib/untgz/Makefile.msc Tue Jan 3 07:42:59 2006 @@ -0,0 +1,17 @@ +CC=cl +CFLAGS=-MD + +untgz.exe: untgz.obj ..\..\zlib.lib + $(CC) $(CFLAGS) untgz.obj ..\..\zlib.lib + +untgz.obj: untgz.c ..\..\zlib.h + $(CC) $(CFLAGS) -c -I..\.. untgz.c + +..\..\zlib.lib: + cd ..\.. + $(MAKE) -f win32\makefile.msc + cd contrib\untgz + +clean: + -del untgz.obj + -del untgz.exe Added: external/zlib/contrib/untgz/untgz.c ============================================================================== --- (empty file) +++ external/zlib/contrib/untgz/untgz.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,674 @@ +/* + * untgz.c -- Display contents and extract files from a gzip'd TAR file + * + * written by Pedro A. Aranda Gutierrez + * adaptation to Unix by Jean-loup Gailly + * various fixes by Cosmin Truta + */ + +#include +#include +#include +#include +#include + +#include "zlib.h" + +#ifdef unix +# include +#else +# include +# include +#endif + +#ifdef WIN32 +#include +# ifndef F_OK +# define F_OK 0 +# endif +# define mkdir(dirname,mode) _mkdir(dirname) +# ifdef _MSC_VER +# define access(path,mode) _access(path,mode) +# define chmod(path,mode) _chmod(path,mode) +# define strdup(str) _strdup(str) +# endif +#else +# include +#endif + + +/* values used in typeflag field */ + +#define REGTYPE '0' /* regular file */ +#define AREGTYPE '\0' /* regular file */ +#define LNKTYPE '1' /* link */ +#define SYMTYPE '2' /* reserved */ +#define CHRTYPE '3' /* character special */ +#define BLKTYPE '4' /* block special */ +#define DIRTYPE '5' /* directory */ +#define FIFOTYPE '6' /* FIFO special */ +#define CONTTYPE '7' /* reserved */ + +/* GNU tar extensions */ + +#define GNUTYPE_DUMPDIR 'D' /* file names from dumped directory */ +#define GNUTYPE_LONGLINK 'K' /* long link name */ +#define GNUTYPE_LONGNAME 'L' /* long file name */ +#define GNUTYPE_MULTIVOL 'M' /* continuation of file from another volume */ +#define GNUTYPE_NAMES 'N' /* file name that does not fit into main hdr */ +#define GNUTYPE_SPARSE 'S' /* sparse file */ +#define GNUTYPE_VOLHDR 'V' /* tape/volume header */ + + +/* tar header */ + +#define BLOCKSIZE 512 +#define SHORTNAMESIZE 100 + +struct tar_header +{ /* byte offset */ + char name[100]; /* 0 */ + char mode[8]; /* 100 */ + char uid[8]; /* 108 */ + char gid[8]; /* 116 */ + char size[12]; /* 124 */ + char mtime[12]; /* 136 */ + char chksum[8]; /* 148 */ + char typeflag; /* 156 */ + char linkname[100]; /* 157 */ + char magic[6]; /* 257 */ + char version[2]; /* 263 */ + char uname[32]; /* 265 */ + char gname[32]; /* 297 */ + char devmajor[8]; /* 329 */ + char devminor[8]; /* 337 */ + char prefix[155]; /* 345 */ + /* 500 */ +}; + +union tar_buffer +{ + char buffer[BLOCKSIZE]; + struct tar_header header; +}; + +struct attr_item +{ + struct attr_item *next; + char *fname; + int mode; + time_t time; +}; + +enum { TGZ_EXTRACT, TGZ_LIST, TGZ_INVALID }; + +char *TGZfname OF((const char *)); +void TGZnotfound OF((const char *)); + +int getoct OF((char *, int)); +char *strtime OF((time_t *)); +int setfiletime OF((char *, time_t)); +void push_attr OF((struct attr_item **, char *, int, time_t)); +void restore_attr OF((struct attr_item **)); + +int ExprMatch OF((char *, char *)); + +int makedir OF((char *)); +int matchname OF((int, int, char **, char *)); + +void error OF((const char *)); +int tar OF((gzFile, int, int, int, char **)); + +void help OF((int)); +int main OF((int, char **)); + +char *prog; + +const char *TGZsuffix[] = { "\0", ".tar", ".tar.gz", ".taz", ".tgz", NULL }; + +/* return the file name of the TGZ archive */ +/* or NULL if it does not exist */ + +char *TGZfname (const char *arcname) +{ + static char buffer[1024]; + int origlen,i; + + strcpy(buffer,arcname); + origlen = strlen(buffer); + + for (i=0; TGZsuffix[i]; i++) + { + strcpy(buffer+origlen,TGZsuffix[i]); + if (access(buffer,F_OK) == 0) + return buffer; + } + return NULL; +} + + +/* error message for the filename */ + +void TGZnotfound (const char *arcname) +{ + int i; + + fprintf(stderr,"%s: Couldn't find ",prog); + for (i=0;TGZsuffix[i];i++) + fprintf(stderr,(TGZsuffix[i+1]) ? "%s%s, " : "or %s%s\n", + arcname, + TGZsuffix[i]); + exit(1); +} + + +/* convert octal digits to int */ +/* on error return -1 */ + +int getoct (char *p,int width) +{ + int result = 0; + char c; + + while (width--) + { + c = *p++; + if (c == 0) + break; + if (c == ' ') + continue; + if (c < '0' || c > '7') + return -1; + result = result * 8 + (c - '0'); + } + return result; +} + + +/* convert time_t to string */ +/* use the "YYYY/MM/DD hh:mm:ss" format */ + +char *strtime (time_t *t) +{ + struct tm *local; + static char result[32]; + + local = localtime(t); + sprintf(result,"%4d/%02d/%02d %02d:%02d:%02d", + local->tm_year+1900, local->tm_mon+1, local->tm_mday, + local->tm_hour, local->tm_min, local->tm_sec); + return result; +} + + +/* set file time */ + +int setfiletime (char *fname,time_t ftime) +{ +#ifdef WIN32 + static int isWinNT = -1; + SYSTEMTIME st; + FILETIME locft, modft; + struct tm *loctm; + HANDLE hFile; + int result; + + loctm = localtime(&ftime); + if (loctm == NULL) + return -1; + + st.wYear = (WORD)loctm->tm_year + 1900; + st.wMonth = (WORD)loctm->tm_mon + 1; + st.wDayOfWeek = (WORD)loctm->tm_wday; + st.wDay = (WORD)loctm->tm_mday; + st.wHour = (WORD)loctm->tm_hour; + st.wMinute = (WORD)loctm->tm_min; + st.wSecond = (WORD)loctm->tm_sec; + st.wMilliseconds = 0; + if (!SystemTimeToFileTime(&st, &locft) || + !LocalFileTimeToFileTime(&locft, &modft)) + return -1; + + if (isWinNT < 0) + isWinNT = (GetVersion() < 0x80000000) ? 1 : 0; + hFile = CreateFile(fname, GENERIC_WRITE, 0, NULL, OPEN_EXISTING, + (isWinNT ? FILE_FLAG_BACKUP_SEMANTICS : 0), + NULL); + if (hFile == INVALID_HANDLE_VALUE) + return -1; + result = SetFileTime(hFile, NULL, NULL, &modft) ? 0 : -1; + CloseHandle(hFile); + return result; +#else + struct utimbuf settime; + + settime.actime = settime.modtime = ftime; + return utime(fname,&settime); +#endif +} + + +/* push file attributes */ + +void push_attr(struct attr_item **list,char *fname,int mode,time_t time) +{ + struct attr_item *item; + + item = (struct attr_item *)malloc(sizeof(struct attr_item)); + if (item == NULL) + error("Out of memory"); + item->fname = strdup(fname); + item->mode = mode; + item->time = time; + item->next = *list; + *list = item; +} + + +/* restore file attributes */ + +void restore_attr(struct attr_item **list) +{ + struct attr_item *item, *prev; + + for (item = *list; item != NULL; ) + { + setfiletime(item->fname,item->time); + chmod(item->fname,item->mode); + prev = item; + item = item->next; + free(prev); + } + *list = NULL; +} + + +/* match regular expression */ + +#define ISSPECIAL(c) (((c) == '*') || ((c) == '/')) + +int ExprMatch (char *string,char *expr) +{ + while (1) + { + if (ISSPECIAL(*expr)) + { + if (*expr == '/') + { + if (*string != '\\' && *string != '/') + return 0; + string ++; expr++; + } + else if (*expr == '*') + { + if (*expr ++ == 0) + return 1; + while (*++string != *expr) + if (*string == 0) + return 0; + } + } + else + { + if (*string != *expr) + return 0; + if (*expr++ == 0) + return 1; + string++; + } + } +} + + +/* recursive mkdir */ +/* abort on ENOENT; ignore other errors like "directory already exists" */ +/* return 1 if OK */ +/* 0 on error */ + +int makedir (char *newdir) +{ + char *buffer = strdup(newdir); + char *p; + int len = strlen(buffer); + + if (len <= 0) { + free(buffer); + return 0; + } + if (buffer[len-1] == '/') { + buffer[len-1] = '\0'; + } + if (mkdir(buffer, 0755) == 0) + { + free(buffer); + return 1; + } + + p = buffer+1; + while (1) + { + char hold; + + while(*p && *p != '\\' && *p != '/') + p++; + hold = *p; + *p = 0; + if ((mkdir(buffer, 0755) == -1) && (errno == ENOENT)) + { + fprintf(stderr,"%s: Couldn't create directory %s\n",prog,buffer); + free(buffer); + return 0; + } + if (hold == 0) + break; + *p++ = hold; + } + free(buffer); + return 1; +} + + +int matchname (int arg,int argc,char **argv,char *fname) +{ + if (arg == argc) /* no arguments given (untgz tgzarchive) */ + return 1; + + while (arg < argc) + if (ExprMatch(fname,argv[arg++])) + return 1; + + return 0; /* ignore this for the moment being */ +} + + +/* tar file list or extract */ + +int tar (gzFile in,int action,int arg,int argc,char **argv) +{ + union tar_buffer buffer; + int len; + int err; + int getheader = 1; + int remaining = 0; + FILE *outfile = NULL; + char fname[BLOCKSIZE]; + int tarmode; + time_t tartime; + struct attr_item *attributes = NULL; + + if (action == TGZ_LIST) + printf(" date time size file\n" + " ---------- -------- --------- -------------------------------------\n"); + while (1) + { + len = gzread(in, &buffer, BLOCKSIZE); + if (len < 0) + error(gzerror(in, &err)); + /* + * Always expect complete blocks to process + * the tar information. + */ + if (len != BLOCKSIZE) + { + action = TGZ_INVALID; /* force error exit */ + remaining = 0; /* force I/O cleanup */ + } + + /* + * If we have to get a tar header + */ + if (getheader >= 1) + { + /* + * if we met the end of the tar + * or the end-of-tar block, + * we are done + */ + if (len == 0 || buffer.header.name[0] == 0) + break; + + tarmode = getoct(buffer.header.mode,8); + tartime = (time_t)getoct(buffer.header.mtime,12); + if (tarmode == -1 || tartime == (time_t)-1) + { + buffer.header.name[0] = 0; + action = TGZ_INVALID; + } + + if (getheader == 1) + { + strncpy(fname,buffer.header.name,SHORTNAMESIZE); + if (fname[SHORTNAMESIZE-1] != 0) + fname[SHORTNAMESIZE] = 0; + } + else + { + /* + * The file name is longer than SHORTNAMESIZE + */ + if (strncmp(fname,buffer.header.name,SHORTNAMESIZE-1) != 0) + error("bad long name"); + getheader = 1; + } + + /* + * Act according to the type flag + */ + switch (buffer.header.typeflag) + { + case DIRTYPE: + if (action == TGZ_LIST) + printf(" %s %s\n",strtime(&tartime),fname); + if (action == TGZ_EXTRACT) + { + makedir(fname); + push_attr(&attributes,fname,tarmode,tartime); + } + break; + case REGTYPE: + case AREGTYPE: + remaining = getoct(buffer.header.size,12); + if (remaining == -1) + { + action = TGZ_INVALID; + break; + } + if (action == TGZ_LIST) + printf(" %s %9d %s\n",strtime(&tartime),remaining,fname); + else if (action == TGZ_EXTRACT) + { + if (matchname(arg,argc,argv,fname)) + { + outfile = fopen(fname,"wb"); + if (outfile == NULL) { + /* try creating directory */ + char *p = strrchr(fname, '/'); + if (p != NULL) { + *p = '\0'; + makedir(fname); + *p = '/'; + outfile = fopen(fname,"wb"); + } + } + if (outfile != NULL) + printf("Extracting %s\n",fname); + else + fprintf(stderr, "%s: Couldn't create %s",prog,fname); + } + else + outfile = NULL; + } + getheader = 0; + break; + case GNUTYPE_LONGLINK: + case GNUTYPE_LONGNAME: + remaining = getoct(buffer.header.size,12); + if (remaining < 0 || remaining >= BLOCKSIZE) + { + action = TGZ_INVALID; + break; + } + len = gzread(in, fname, BLOCKSIZE); + if (len < 0) + error(gzerror(in, &err)); + if (fname[BLOCKSIZE-1] != 0 || (int)strlen(fname) > remaining) + { + action = TGZ_INVALID; + break; + } + getheader = 2; + break; + default: + if (action == TGZ_LIST) + printf(" %s <---> %s\n",strtime(&tartime),fname); + break; + } + } + else + { + unsigned int bytes = (remaining > BLOCKSIZE) ? BLOCKSIZE : remaining; + + if (outfile != NULL) + { + if (fwrite(&buffer,sizeof(char),bytes,outfile) != bytes) + { + fprintf(stderr, + "%s: Error writing %s -- skipping\n",prog,fname); + fclose(outfile); + outfile = NULL; + remove(fname); + } + } + remaining -= bytes; + } + + if (remaining == 0) + { + getheader = 1; + if (outfile != NULL) + { + fclose(outfile); + outfile = NULL; + if (action != TGZ_INVALID) + push_attr(&attributes,fname,tarmode,tartime); + } + } + + /* + * Abandon if errors are found + */ + if (action == TGZ_INVALID) + { + error("broken archive"); + break; + } + } + + /* + * Restore file modes and time stamps + */ + restore_attr(&attributes); + + if (gzclose(in) != Z_OK) + error("failed gzclose"); + + return 0; +} + + +/* ============================================================ */ + +void help(int exitval) +{ + printf("untgz version 0.2.1\n" + " using zlib version %s\n\n", + zlibVersion()); + printf("Usage: untgz file.tgz extract all files\n" + " untgz file.tgz fname ... extract selected files\n" + " untgz -l file.tgz list archive contents\n" + " untgz -h display this help\n"); + exit(exitval); +} + +void error(const char *msg) +{ + fprintf(stderr, "%s: %s\n", prog, msg); + exit(1); +} + + +/* ============================================================ */ + +#if defined(WIN32) && defined(__GNUC__) +int _CRT_glob = 0; /* disable argument globbing in MinGW */ +#endif + +int main(int argc,char **argv) +{ + int action = TGZ_EXTRACT; + int arg = 1; + char *TGZfile; + gzFile *f; + + prog = strrchr(argv[0],'\\'); + if (prog == NULL) + { + prog = strrchr(argv[0],'/'); + if (prog == NULL) + { + prog = strrchr(argv[0],':'); + if (prog == NULL) + prog = argv[0]; + else + prog++; + } + else + prog++; + } + else + prog++; + + if (argc == 1) + help(0); + + if (strcmp(argv[arg],"-l") == 0) + { + action = TGZ_LIST; + if (argc == ++arg) + help(0); + } + else if (strcmp(argv[arg],"-h") == 0) + { + help(0); + } + + if ((TGZfile = TGZfname(argv[arg])) == NULL) + TGZnotfound(argv[arg]); + + ++arg; + if ((action == TGZ_LIST) && (arg != argc)) + help(1); + +/* + * Process the TGZ file + */ + switch(action) + { + case TGZ_LIST: + case TGZ_EXTRACT: + f = gzopen(TGZfile,"rb"); + if (f == NULL) + { + fprintf(stderr,"%s: Couldn't gzopen %s\n",prog,TGZfile); + return 1; + } + exit(tar(f, action, arg, argc, argv)); + break; + + default: + error("Unknown option"); + exit(1); + } + + return 0; +} Added: external/zlib/contrib/vstudio/readme.txt ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/readme.txt Tue Jan 3 07:42:59 2006 @@ -0,0 +1,73 @@ +Building instructions for the DLL versions of Zlib 1.2.3 +======================================================== + +This directory contains projects that build zlib and minizip using +Microsoft Visual C++ 7.0/7.1, and Visual C++ . + +You don't need to build these projects yourself. You can download the +binaries from: + http://www.winimage.com/zLibDll + +More information can be found at this site. + + +Build instructions for Visual Studio 7.x (32 bits) +-------------------------------------------------- +- Uncompress current zlib, including all contrib/* files +- Download the crtdll library from + http://www.winimage.com/zLibDll/crtdll.zip + Unzip crtdll.zip to extract crtdll.lib on contrib\vstudio\vc7. +- Open contrib\vstudio\vc7\zlibvc.sln with Microsoft Visual C++ 7.x + (Visual Studio .Net 2002 or 2003). + +Build instructions for Visual Studio 2005 (32 bits or 64 bits) +-------------------------------------------------------------- +- Uncompress current zlib, including all contrib/* files +- For 32 bits only: download the crtdll library from + http://www.winimage.com/zLibDll/crtdll.zip + Unzip crtdll.zip to extract crtdll.lib on contrib\vstudio\vc8. +- Open contrib\vstudio\vc8\zlibvc.sln with Microsoft Visual C++ 8.0 + +Build instructions for Visual Studio 2005 64 bits, PSDK compiler +---------------------------------------------------------------- +at the time of writing this text file, Visual Studio 2005 (and + Microsoft Visual C++ 8.0) is on the beta 2 stage. +Using you can get the free 64 bits compiler from Platform SDK, + which is NOT a beta, and compile using the Visual studio 2005 IDE +see http://www.winimage.com/misc/sdk64onvs2005/ for instruction + +- Uncompress current zlib, including all contrib/* files +- start Visual Studio 2005 from a platform SDK command prompt, using + the /useenv switch +- Open contrib\vstudio\vc8\zlibvc.sln with Microsoft Visual C++ 8.0 + + +Important +--------- +- To use zlibwapi.dll in your application, you must define the + macro ZLIB_WINAPI when compiling your application's source files. + + +Additional notes +---------------- +- This DLL, named zlibwapi.dll, is compatible to the old zlib.dll built + by Gilles Vollant from the zlib 1.1.x sources, and distributed at + http://www.winimage.com/zLibDll + It uses the WINAPI calling convention for the exported functions, and + includes the minizip functionality. If your application needs that + particular build of zlib.dll, you can rename zlibwapi.dll to zlib.dll. + +- The new DLL was renamed because there exist several incompatible + versions of zlib.dll on the Internet. + +- There is also an official DLL build of zlib, named zlib1.dll. This one + is exporting the functions using the CDECL convention. See the file + win32\DLL_FAQ.txt found in this zlib distribution. + +- There used to be a ZLIB_DLL macro in zlib 1.1.x, but now this symbol + has a slightly different effect. To avoid compatibility problems, do + not define it here. + + +Gilles Vollant +info at winimage.com Added: external/zlib/contrib/vstudio/vc7/miniunz.vcproj ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/vc7/miniunz.vcproj Tue Jan 3 07:42:59 2006 @@ -0,0 +1,126 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Added: external/zlib/contrib/vstudio/vc7/minizip.vcproj ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/vc7/minizip.vcproj Tue Jan 3 07:42:59 2006 @@ -0,0 +1,126 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Added: external/zlib/contrib/vstudio/vc7/testzlib.vcproj ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/vc7/testzlib.vcproj Tue Jan 3 07:42:59 2006 @@ -0,0 +1,126 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Added: external/zlib/contrib/vstudio/vc7/zlib.rc ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/vc7/zlib.rc Tue Jan 3 07:42:59 2006 @@ -0,0 +1,32 @@ +#include + +#define IDR_VERSION1 1 +IDR_VERSION1 VERSIONINFO MOVEABLE IMPURE LOADONCALL DISCARDABLE + FILEVERSION 1,2,3,0 + PRODUCTVERSION 1,2,3,0 + FILEFLAGSMASK VS_FFI_FILEFLAGSMASK + FILEFLAGS 0 + FILEOS VOS_DOS_WINDOWS32 + FILETYPE VFT_DLL + FILESUBTYPE 0 // not used +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904E4" + //language ID = U.S. English, char set = Windows, Multilingual + + BEGIN + VALUE "FileDescription", "zlib data compression library\0" + VALUE "FileVersion", "1.2.3.0\0" + VALUE "InternalName", "zlib\0" + VALUE "OriginalFilename", "zlib.dll\0" + VALUE "ProductName", "ZLib.DLL\0" + VALUE "Comments","DLL support by Alessandro Iacopetti & Gilles Vollant\0" + VALUE "LegalCopyright", "(C) 1995-2003 Jean-loup Gailly & Mark Adler\0" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x0409, 1252 + END +END Added: external/zlib/contrib/vstudio/vc7/zlibstat.vcproj ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/vc7/zlibstat.vcproj Tue Jan 3 07:42:59 2006 @@ -0,0 +1,246 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Added: external/zlib/contrib/vstudio/vc7/zlibvc.def ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/vc7/zlibvc.def Tue Jan 3 07:42:59 2006 @@ -0,0 +1,92 @@ + +VERSION 1.23 + +HEAPSIZE 1048576,8192 + +EXPORTS + adler32 @1 + compress @2 + crc32 @3 + deflate @4 + deflateCopy @5 + deflateEnd @6 + deflateInit2_ @7 + deflateInit_ @8 + deflateParams @9 + deflateReset @10 + deflateSetDictionary @11 + gzclose @12 + gzdopen @13 + gzerror @14 + gzflush @15 + gzopen @16 + gzread @17 + gzwrite @18 + inflate @19 + inflateEnd @20 + inflateInit2_ @21 + inflateInit_ @22 + inflateReset @23 + inflateSetDictionary @24 + inflateSync @25 + uncompress @26 + zlibVersion @27 + gzprintf @28 + gzputc @29 + gzgetc @30 + gzseek @31 + gzrewind @32 + gztell @33 + gzeof @34 + gzsetparams @35 + zError @36 + inflateSyncPoint @37 + get_crc_table @38 + compress2 @39 + gzputs @40 + gzgets @41 + inflateCopy @42 + inflateBackInit_ @43 + inflateBack @44 + inflateBackEnd @45 + compressBound @46 + deflateBound @47 + gzclearerr @48 + gzungetc @49 + zlibCompileFlags @50 + deflatePrime @51 + + unzOpen @61 + unzClose @62 + unzGetGlobalInfo @63 + unzGetCurrentFileInfo @64 + unzGoToFirstFile @65 + unzGoToNextFile @66 + unzOpenCurrentFile @67 + unzReadCurrentFile @68 + unzOpenCurrentFile3 @69 + unztell @70 + unzeof @71 + unzCloseCurrentFile @72 + unzGetGlobalComment @73 + unzStringFileNameCompare @74 + unzLocateFile @75 + unzGetLocalExtrafield @76 + unzOpen2 @77 + unzOpenCurrentFile2 @78 + unzOpenCurrentFilePassword @79 + + zipOpen @80 + zipOpenNewFileInZip @81 + zipWriteInFileInZip @82 + zipCloseFileInZip @83 + zipClose @84 + zipOpenNewFileInZip2 @86 + zipCloseFileInZipRaw @87 + zipOpen2 @88 + zipOpenNewFileInZip3 @89 + + unzGetFilePos @100 + unzGoToFilePos @101 + + fill_win32_filefunc @110 Added: external/zlib/contrib/vstudio/vc7/zlibvc.sln ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/vc7/zlibvc.sln Tue Jan 3 07:42:59 2006 @@ -0,0 +1,78 @@ +Microsoft Visual Studio Solution File, Format Version 7.00 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zlibstat", "zlibstat.vcproj", "{745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zlibvc", "zlibvc.vcproj", "{8FD826F8-3739-44E6-8CC8-997122E53B8D}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "minizip", "minizip.vcproj", "{48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "miniunz", "miniunz.vcproj", "{C52F9E7B-498A-42BE-8DB4-85A15694382A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "testZlibDll", "testzlib.vcproj", "{AA6666AA-E09F-4135-9C0C-4FE50C3C654C}" +EndProject +Global + GlobalSection(SolutionConfiguration) = preSolution + ConfigName.0 = Debug + ConfigName.1 = Release + ConfigName.2 = ReleaseAxp + ConfigName.3 = ReleaseWithoutAsm + ConfigName.4 = ReleaseWithoutCrtdll + EndGlobalSection + GlobalSection(ProjectDependencies) = postSolution + EndGlobalSection + GlobalSection(ProjectConfiguration) = postSolution + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.Debug.ActiveCfg = Debug|Win32 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.Debug.Build.0 = Debug|Win32 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.Release.ActiveCfg = Release|Win32 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.Release.Build.0 = Release|Win32 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.ReleaseAxp.ActiveCfg = ReleaseAxp|Win32 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.ReleaseAxp.Build.0 = ReleaseAxp|Win32 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.ReleaseWithoutAsm.ActiveCfg = ReleaseWithoutAsm|Win32 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.ReleaseWithoutAsm.Build.0 = ReleaseWithoutAsm|Win32 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.ReleaseWithoutCrtdll.ActiveCfg = ReleaseAxp|Win32 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.ReleaseWithoutCrtdll.Build.0 = ReleaseAxp|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Debug.ActiveCfg = Debug|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Debug.Build.0 = Debug|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release.ActiveCfg = Release|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release.Build.0 = Release|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseAxp.ActiveCfg = ReleaseAxp|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseAxp.Build.0 = ReleaseAxp|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseWithoutAsm.ActiveCfg = ReleaseWithoutAsm|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseWithoutAsm.Build.0 = ReleaseWithoutAsm|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseWithoutCrtdll.ActiveCfg = ReleaseWithoutCrtdll|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseWithoutCrtdll.Build.0 = ReleaseWithoutCrtdll|Win32 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Debug.ActiveCfg = Debug|Win32 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Debug.Build.0 = Debug|Win32 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Release.ActiveCfg = Release|Win32 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Release.Build.0 = Release|Win32 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.ReleaseAxp.ActiveCfg = Release|Win32 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.ReleaseAxp.Build.0 = Release|Win32 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm.ActiveCfg = Release|Win32 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm.Build.0 = Release|Win32 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutCrtdll.ActiveCfg = Release|Win32 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutCrtdll.Build.0 = Release|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Debug.ActiveCfg = Debug|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Debug.Build.0 = Debug|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Release.ActiveCfg = Release|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Release.Build.0 = Release|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseAxp.ActiveCfg = Release|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseAxp.Build.0 = Release|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutAsm.ActiveCfg = Release|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutAsm.Build.0 = Release|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutCrtdll.ActiveCfg = Release|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutCrtdll.Build.0 = Release|Win32 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.Debug.ActiveCfg = Debug|Win32 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.Debug.Build.0 = Debug|Win32 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.Release.ActiveCfg = Release|Win32 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.Release.Build.0 = Release|Win32 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.ReleaseAxp.ActiveCfg = Release|Win32 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.ReleaseAxp.Build.0 = Release|Win32 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.ReleaseWithoutAsm.ActiveCfg = Release|Win32 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.ReleaseWithoutAsm.Build.0 = Release|Win32 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.ReleaseWithoutCrtdll.ActiveCfg = Release|Win32 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.ReleaseWithoutCrtdll.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + EndGlobalSection + GlobalSection(ExtensibilityAddIns) = postSolution + EndGlobalSection +EndGlobal Added: external/zlib/contrib/vstudio/vc7/zlibvc.vcproj ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/vc7/zlibvc.vcproj Tue Jan 3 07:42:59 2006 @@ -0,0 +1,445 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Added: external/zlib/contrib/vstudio/vc8/miniunz.vcproj ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/vc8/miniunz.vcproj Tue Jan 3 07:42:59 2006 @@ -0,0 +1,566 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Added: external/zlib/contrib/vstudio/vc8/minizip.vcproj ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/vc8/minizip.vcproj Tue Jan 3 07:42:59 2006 @@ -0,0 +1,563 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Added: external/zlib/contrib/vstudio/vc8/testzlib.vcproj ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/vc8/testzlib.vcproj Tue Jan 3 07:42:59 2006 @@ -0,0 +1,948 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Added: external/zlib/contrib/vstudio/vc8/testzlibdll.vcproj ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/vc8/testzlibdll.vcproj Tue Jan 3 07:42:59 2006 @@ -0,0 +1,567 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Added: external/zlib/contrib/vstudio/vc8/zlib.rc ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/vc8/zlib.rc Tue Jan 3 07:42:59 2006 @@ -0,0 +1,32 @@ +#include + +#define IDR_VERSION1 1 +IDR_VERSION1 VERSIONINFO MOVEABLE IMPURE LOADONCALL DISCARDABLE + FILEVERSION 1,2,3,0 + PRODUCTVERSION 1,2,3,0 + FILEFLAGSMASK VS_FFI_FILEFLAGSMASK + FILEFLAGS 0 + FILEOS VOS_DOS_WINDOWS32 + FILETYPE VFT_DLL + FILESUBTYPE 0 // not used +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904E4" + //language ID = U.S. English, char set = Windows, Multilingual + + BEGIN + VALUE "FileDescription", "zlib data compression library\0" + VALUE "FileVersion", "1.2.3.0\0" + VALUE "InternalName", "zlib\0" + VALUE "OriginalFilename", "zlib.dll\0" + VALUE "ProductName", "ZLib.DLL\0" + VALUE "Comments","DLL support by Alessandro Iacopetti & Gilles Vollant\0" + VALUE "LegalCopyright", "(C) 1995-2003 Jean-loup Gailly & Mark Adler\0" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x0409, 1252 + END +END Added: external/zlib/contrib/vstudio/vc8/zlibstat.vcproj ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/vc8/zlibstat.vcproj Tue Jan 3 07:42:59 2006 @@ -0,0 +1,870 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Added: external/zlib/contrib/vstudio/vc8/zlibvc.def ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/vc8/zlibvc.def Tue Jan 3 07:42:59 2006 @@ -0,0 +1,92 @@ + +VERSION 1.23 + +HEAPSIZE 1048576,8192 + +EXPORTS + adler32 @1 + compress @2 + crc32 @3 + deflate @4 + deflateCopy @5 + deflateEnd @6 + deflateInit2_ @7 + deflateInit_ @8 + deflateParams @9 + deflateReset @10 + deflateSetDictionary @11 + gzclose @12 + gzdopen @13 + gzerror @14 + gzflush @15 + gzopen @16 + gzread @17 + gzwrite @18 + inflate @19 + inflateEnd @20 + inflateInit2_ @21 + inflateInit_ @22 + inflateReset @23 + inflateSetDictionary @24 + inflateSync @25 + uncompress @26 + zlibVersion @27 + gzprintf @28 + gzputc @29 + gzgetc @30 + gzseek @31 + gzrewind @32 + gztell @33 + gzeof @34 + gzsetparams @35 + zError @36 + inflateSyncPoint @37 + get_crc_table @38 + compress2 @39 + gzputs @40 + gzgets @41 + inflateCopy @42 + inflateBackInit_ @43 + inflateBack @44 + inflateBackEnd @45 + compressBound @46 + deflateBound @47 + gzclearerr @48 + gzungetc @49 + zlibCompileFlags @50 + deflatePrime @51 + + unzOpen @61 + unzClose @62 + unzGetGlobalInfo @63 + unzGetCurrentFileInfo @64 + unzGoToFirstFile @65 + unzGoToNextFile @66 + unzOpenCurrentFile @67 + unzReadCurrentFile @68 + unzOpenCurrentFile3 @69 + unztell @70 + unzeof @71 + unzCloseCurrentFile @72 + unzGetGlobalComment @73 + unzStringFileNameCompare @74 + unzLocateFile @75 + unzGetLocalExtrafield @76 + unzOpen2 @77 + unzOpenCurrentFile2 @78 + unzOpenCurrentFilePassword @79 + + zipOpen @80 + zipOpenNewFileInZip @81 + zipWriteInFileInZip @82 + zipCloseFileInZip @83 + zipClose @84 + zipOpenNewFileInZip2 @86 + zipCloseFileInZipRaw @87 + zipOpen2 @88 + zipOpenNewFileInZip3 @89 + + unzGetFilePos @100 + unzGoToFilePos @101 + + fill_win32_filefunc @110 Added: external/zlib/contrib/vstudio/vc8/zlibvc.sln ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/vc8/zlibvc.sln Tue Jan 3 07:42:59 2006 @@ -0,0 +1,144 @@ + +Microsoft Visual Studio Solution File, Format Version 9.00 +# Visual Studio 2005 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zlibvc", "zlibvc.vcproj", "{8FD826F8-3739-44E6-8CC8-997122E53B8D}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zlibstat", "zlibstat.vcproj", "{745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "testzlib", "testzlib.vcproj", "{AA6666AA-E09F-4135-9C0C-4FE50C3C654B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestZlibDll", "testzlibdll.vcproj", "{C52F9E7B-498A-42BE-8DB4-85A15694366A}" + ProjectSection(ProjectDependencies) = postProject + {8FD826F8-3739-44E6-8CC8-997122E53B8D} = {8FD826F8-3739-44E6-8CC8-997122E53B8D} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "minizip", "minizip.vcproj", "{48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}" + ProjectSection(ProjectDependencies) = postProject + {8FD826F8-3739-44E6-8CC8-997122E53B8D} = {8FD826F8-3739-44E6-8CC8-997122E53B8D} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "miniunz", "miniunz.vcproj", "{C52F9E7B-498A-42BE-8DB4-85A15694382A}" + ProjectSection(ProjectDependencies) = postProject + {8FD826F8-3739-44E6-8CC8-997122E53B8D} = {8FD826F8-3739-44E6-8CC8-997122E53B8D} + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Itanium = Debug|Itanium + Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 + Release|Itanium = Release|Itanium + Release|Win32 = Release|Win32 + Release|x64 = Release|x64 + ReleaseWithoutAsm|Itanium = ReleaseWithoutAsm|Itanium + ReleaseWithoutAsm|Win32 = ReleaseWithoutAsm|Win32 + ReleaseWithoutAsm|x64 = ReleaseWithoutAsm|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Debug|Itanium.ActiveCfg = Debug|Itanium + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Debug|Itanium.Build.0 = Debug|Itanium + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Debug|Win32.ActiveCfg = Debug|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Debug|Win32.Build.0 = Debug|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Debug|x64.ActiveCfg = Debug|x64 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Debug|x64.Build.0 = Debug|x64 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release|Itanium.ActiveCfg = Release|Itanium + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release|Itanium.Build.0 = Release|Itanium + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release|Win32.ActiveCfg = Release|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release|Win32.Build.0 = Release|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release|x64.ActiveCfg = ReleaseWithoutAsm|x64 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release|x64.Build.0 = ReleaseWithoutAsm|x64 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseWithoutAsm|Itanium.ActiveCfg = ReleaseWithoutAsm|Itanium + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseWithoutAsm|Itanium.Build.0 = ReleaseWithoutAsm|Itanium + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseWithoutAsm|Win32.ActiveCfg = ReleaseWithoutAsm|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseWithoutAsm|Win32.Build.0 = ReleaseWithoutAsm|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseWithoutAsm|x64.ActiveCfg = ReleaseWithoutAsm|x64 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseWithoutAsm|x64.Build.0 = ReleaseWithoutAsm|x64 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.Debug|Itanium.ActiveCfg = Debug|Itanium + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.Debug|Itanium.Build.0 = Debug|Itanium + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.Debug|Win32.ActiveCfg = Debug|Win32 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.Debug|Win32.Build.0 = Debug|Win32 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.Debug|x64.ActiveCfg = Debug|x64 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.Debug|x64.Build.0 = Debug|x64 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.Release|Itanium.ActiveCfg = Release|Itanium + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.Release|Itanium.Build.0 = Release|Itanium + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.Release|Win32.ActiveCfg = Release|Win32 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.Release|Win32.Build.0 = Release|Win32 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.Release|x64.ActiveCfg = Release|x64 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.Release|x64.Build.0 = Release|x64 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.ReleaseWithoutAsm|Itanium.ActiveCfg = ReleaseWithoutAsm|Itanium + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.ReleaseWithoutAsm|Itanium.Build.0 = ReleaseWithoutAsm|Itanium + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.ReleaseWithoutAsm|Win32.ActiveCfg = ReleaseWithoutAsm|Win32 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.ReleaseWithoutAsm|Win32.Build.0 = ReleaseWithoutAsm|Win32 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.ReleaseWithoutAsm|x64.ActiveCfg = ReleaseWithoutAsm|x64 + {745DEC58-EBB3-47A9-A9B8-4C6627C01BF8}.ReleaseWithoutAsm|x64.Build.0 = ReleaseWithoutAsm|x64 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.Debug|Itanium.ActiveCfg = Debug|Itanium + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.Debug|Itanium.Build.0 = Debug|Itanium + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.Debug|Win32.ActiveCfg = Debug|Win32 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.Debug|Win32.Build.0 = Debug|Win32 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.Debug|x64.ActiveCfg = Debug|x64 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.Debug|x64.Build.0 = Debug|x64 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.Release|Itanium.ActiveCfg = Release|Itanium + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.Release|Itanium.Build.0 = Release|Itanium + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.Release|Win32.ActiveCfg = Release|Win32 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.Release|Win32.Build.0 = Release|Win32 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.Release|x64.ActiveCfg = Release|x64 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.Release|x64.Build.0 = Release|x64 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm|Itanium.ActiveCfg = ReleaseWithoutAsm|Itanium + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm|Itanium.Build.0 = ReleaseWithoutAsm|Itanium + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm|Win32.ActiveCfg = ReleaseWithoutAsm|Win32 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm|Win32.Build.0 = ReleaseWithoutAsm|Win32 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm|x64.ActiveCfg = ReleaseWithoutAsm|x64 + {AA6666AA-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm|x64.Build.0 = ReleaseWithoutAsm|x64 + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.Debug|Itanium.ActiveCfg = Debug|Itanium + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.Debug|Itanium.Build.0 = Debug|Itanium + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.Debug|Win32.ActiveCfg = Debug|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.Debug|Win32.Build.0 = Debug|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.Debug|x64.ActiveCfg = Debug|x64 + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.Debug|x64.Build.0 = Debug|x64 + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.Release|Itanium.ActiveCfg = Release|Itanium + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.Release|Itanium.Build.0 = Release|Itanium + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.Release|Win32.ActiveCfg = Release|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.Release|Win32.Build.0 = Release|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.Release|x64.ActiveCfg = Release|x64 + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.Release|x64.Build.0 = Release|x64 + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.ReleaseWithoutAsm|Itanium.ActiveCfg = Release|Itanium + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.ReleaseWithoutAsm|Itanium.Build.0 = Release|Itanium + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.ReleaseWithoutAsm|Win32.ActiveCfg = Release|Itanium + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.ReleaseWithoutAsm|x64.ActiveCfg = Release|Itanium + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Debug|Itanium.ActiveCfg = Debug|Itanium + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Debug|Itanium.Build.0 = Debug|Itanium + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Debug|Win32.ActiveCfg = Debug|Win32 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Debug|Win32.Build.0 = Debug|Win32 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Debug|x64.ActiveCfg = Debug|x64 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Debug|x64.Build.0 = Debug|x64 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Release|Itanium.ActiveCfg = Release|Itanium + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Release|Itanium.Build.0 = Release|Itanium + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Release|Win32.ActiveCfg = Release|Win32 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Release|Win32.Build.0 = Release|Win32 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Release|x64.ActiveCfg = Release|x64 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Release|x64.Build.0 = Release|x64 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm|Itanium.ActiveCfg = Release|Itanium + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm|Itanium.Build.0 = Release|Itanium + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm|Win32.ActiveCfg = Release|Itanium + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm|x64.ActiveCfg = Release|Itanium + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Debug|Itanium.ActiveCfg = Debug|Itanium + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Debug|Itanium.Build.0 = Debug|Itanium + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Debug|Win32.ActiveCfg = Debug|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Debug|Win32.Build.0 = Debug|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Debug|x64.ActiveCfg = Debug|x64 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Debug|x64.Build.0 = Debug|x64 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Release|Itanium.ActiveCfg = Release|Itanium + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Release|Itanium.Build.0 = Release|Itanium + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Release|Win32.ActiveCfg = Release|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Release|Win32.Build.0 = Release|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Release|x64.ActiveCfg = Release|x64 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Release|x64.Build.0 = Release|x64 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutAsm|Itanium.ActiveCfg = Release|Itanium + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutAsm|Itanium.Build.0 = Release|Itanium + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutAsm|Win32.ActiveCfg = Release|Itanium + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutAsm|x64.ActiveCfg = Release|Itanium + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal Added: external/zlib/contrib/vstudio/vc8/zlibvc.vcproj ============================================================================== --- (empty file) +++ external/zlib/contrib/vstudio/vc8/zlibvc.vcproj Tue Jan 3 07:42:59 2006 @@ -0,0 +1,1219 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Added: external/zlib/crc32.c ============================================================================== --- (empty file) +++ external/zlib/crc32.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,423 @@ +/* crc32.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Thanks to Rodney Brown for his contribution of faster + * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing + * tables for updating the shift register in one step with three exclusive-ors + * instead of four steps with four exclusive-ors. This results in about a + * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. + */ + +/* @(#) $Id$ */ + +/* + Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore + protection on the static variables used to control the first-use generation + of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should + first call get_crc_table() to initialize the tables before allowing more than + one thread to use crc32(). + */ + +#ifdef MAKECRCH +# include +# ifndef DYNAMIC_CRC_TABLE +# define DYNAMIC_CRC_TABLE +# endif /* !DYNAMIC_CRC_TABLE */ +#endif /* MAKECRCH */ + +#include "zutil.h" /* for STDC and FAR definitions */ + +#define local static + +/* Find a four-byte integer type for crc32_little() and crc32_big(). */ +#ifndef NOBYFOUR +# ifdef STDC /* need ANSI C limits.h to determine sizes */ +# include +# define BYFOUR +# if (UINT_MAX == 0xffffffffUL) + typedef unsigned int u4; +# else +# if (ULONG_MAX == 0xffffffffUL) + typedef unsigned long u4; +# else +# if (USHRT_MAX == 0xffffffffUL) + typedef unsigned short u4; +# else +# undef BYFOUR /* can't find a four-byte integer type! */ +# endif +# endif +# endif +# endif /* STDC */ +#endif /* !NOBYFOUR */ + +/* Definitions for doing the crc four data bytes at a time. */ +#ifdef BYFOUR +# define REV(w) (((w)>>24)+(((w)>>8)&0xff00)+ \ + (((w)&0xff00)<<8)+(((w)&0xff)<<24)) + local unsigned long crc32_little OF((unsigned long, + const unsigned char FAR *, unsigned)); + local unsigned long crc32_big OF((unsigned long, + const unsigned char FAR *, unsigned)); +# define TBLS 8 +#else +# define TBLS 1 +#endif /* BYFOUR */ + +/* Local functions for crc concatenation */ +local unsigned long gf2_matrix_times OF((unsigned long *mat, + unsigned long vec)); +local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat)); + +#ifdef DYNAMIC_CRC_TABLE + +local volatile int crc_table_empty = 1; +local unsigned long FAR crc_table[TBLS][256]; +local void make_crc_table OF((void)); +#ifdef MAKECRCH + local void write_table OF((FILE *, const unsigned long FAR *)); +#endif /* MAKECRCH */ +/* + Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: + x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. + + Polynomials over GF(2) are represented in binary, one bit per coefficient, + with the lowest powers in the most significant bit. Then adding polynomials + is just exclusive-or, and multiplying a polynomial by x is a right shift by + one. If we call the above polynomial p, and represent a byte as the + polynomial q, also with the lowest power in the most significant bit (so the + byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, + where a mod b means the remainder after dividing a by b. + + This calculation is done using the shift-register method of multiplying and + taking the remainder. The register is initialized to zero, and for each + incoming bit, x^32 is added mod p to the register if the bit is a one (where + x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by + x (which is shifting right by one and adding x^32 mod p if the bit shifted + out is a one). We start with the highest power (least significant bit) of + q and repeat for all eight bits of q. + + The first table is simply the CRC of all possible eight bit values. This is + all the information needed to generate CRCs on data a byte at a time for all + combinations of CRC register values and incoming bytes. The remaining tables + allow for word-at-a-time CRC calculation for both big-endian and little- + endian machines, where a word is four bytes. +*/ +local void make_crc_table() +{ + unsigned long c; + int n, k; + unsigned long poly; /* polynomial exclusive-or pattern */ + /* terms of polynomial defining this crc (except x^32): */ + static volatile int first = 1; /* flag to limit concurrent making */ + static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; + + /* See if another task is already doing this (not thread-safe, but better + than nothing -- significantly reduces duration of vulnerability in + case the advice about DYNAMIC_CRC_TABLE is ignored) */ + if (first) { + first = 0; + + /* make exclusive-or pattern from polynomial (0xedb88320UL) */ + poly = 0UL; + for (n = 0; n < sizeof(p)/sizeof(unsigned char); n++) + poly |= 1UL << (31 - p[n]); + + /* generate a crc for every 8-bit value */ + for (n = 0; n < 256; n++) { + c = (unsigned long)n; + for (k = 0; k < 8; k++) + c = c & 1 ? poly ^ (c >> 1) : c >> 1; + crc_table[0][n] = c; + } + +#ifdef BYFOUR + /* generate crc for each value followed by one, two, and three zeros, + and then the byte reversal of those as well as the first table */ + for (n = 0; n < 256; n++) { + c = crc_table[0][n]; + crc_table[4][n] = REV(c); + for (k = 1; k < 4; k++) { + c = crc_table[0][c & 0xff] ^ (c >> 8); + crc_table[k][n] = c; + crc_table[k + 4][n] = REV(c); + } + } +#endif /* BYFOUR */ + + crc_table_empty = 0; + } + else { /* not first */ + /* wait for the other guy to finish (not efficient, but rare) */ + while (crc_table_empty) + ; + } + +#ifdef MAKECRCH + /* write out CRC tables to crc32.h */ + { + FILE *out; + + out = fopen("crc32.h", "w"); + if (out == NULL) return; + fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n"); + fprintf(out, " * Generated automatically by crc32.c\n */\n\n"); + fprintf(out, "local const unsigned long FAR "); + fprintf(out, "crc_table[TBLS][256] =\n{\n {\n"); + write_table(out, crc_table[0]); +# ifdef BYFOUR + fprintf(out, "#ifdef BYFOUR\n"); + for (k = 1; k < 8; k++) { + fprintf(out, " },\n {\n"); + write_table(out, crc_table[k]); + } + fprintf(out, "#endif\n"); +# endif /* BYFOUR */ + fprintf(out, " }\n};\n"); + fclose(out); + } +#endif /* MAKECRCH */ +} + +#ifdef MAKECRCH +local void write_table(out, table) + FILE *out; + const unsigned long FAR *table; +{ + int n; + + for (n = 0; n < 256; n++) + fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", table[n], + n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); +} +#endif /* MAKECRCH */ + +#else /* !DYNAMIC_CRC_TABLE */ +/* ======================================================================== + * Tables of CRC-32s of all single-byte values, made by make_crc_table(). + */ +#include "crc32.h" +#endif /* DYNAMIC_CRC_TABLE */ + +/* ========================================================================= + * This function can be used by asm versions of crc32() + */ +const unsigned long FAR * ZEXPORT get_crc_table() +{ +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + return (const unsigned long FAR *)crc_table; +} + +/* ========================================================================= */ +#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) +#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 + +/* ========================================================================= */ +unsigned long ZEXPORT crc32(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + unsigned len; +{ + if (buf == Z_NULL) return 0UL; + +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + +#ifdef BYFOUR + if (sizeof(void *) == sizeof(ptrdiff_t)) { + u4 endian; + + endian = 1; + if (*((unsigned char *)(&endian))) + return crc32_little(crc, buf, len); + else + return crc32_big(crc, buf, len); + } +#endif /* BYFOUR */ + crc = crc ^ 0xffffffffUL; + while (len >= 8) { + DO8; + len -= 8; + } + if (len) do { + DO1; + } while (--len); + return crc ^ 0xffffffffUL; +} + +#ifdef BYFOUR + +/* ========================================================================= */ +#define DOLIT4 c ^= *buf4++; \ + c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ + crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] +#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 + +/* ========================================================================= */ +local unsigned long crc32_little(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + unsigned len; +{ + register u4 c; + register const u4 FAR *buf4; + + c = (u4)crc; + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + len--; + } + + buf4 = (const u4 FAR *)(const void FAR *)buf; + while (len >= 32) { + DOLIT32; + len -= 32; + } + while (len >= 4) { + DOLIT4; + len -= 4; + } + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + } while (--len); + c = ~c; + return (unsigned long)c; +} + +/* ========================================================================= */ +#define DOBIG4 c ^= *++buf4; \ + c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ + crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] +#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 + +/* ========================================================================= */ +local unsigned long crc32_big(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + unsigned len; +{ + register u4 c; + register const u4 FAR *buf4; + + c = REV((u4)crc); + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + len--; + } + + buf4 = (const u4 FAR *)(const void FAR *)buf; + buf4--; + while (len >= 32) { + DOBIG32; + len -= 32; + } + while (len >= 4) { + DOBIG4; + len -= 4; + } + buf4++; + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + } while (--len); + c = ~c; + return (unsigned long)(REV(c)); +} + +#endif /* BYFOUR */ + +#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */ + +/* ========================================================================= */ +local unsigned long gf2_matrix_times(mat, vec) + unsigned long *mat; + unsigned long vec; +{ + unsigned long sum; + + sum = 0; + while (vec) { + if (vec & 1) + sum ^= *mat; + vec >>= 1; + mat++; + } + return sum; +} + +/* ========================================================================= */ +local void gf2_matrix_square(square, mat) + unsigned long *square; + unsigned long *mat; +{ + int n; + + for (n = 0; n < GF2_DIM; n++) + square[n] = gf2_matrix_times(mat, mat[n]); +} + +/* ========================================================================= */ +uLong ZEXPORT crc32_combine(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off_t len2; +{ + int n; + unsigned long row; + unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ + unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ + + /* degenerate case */ + if (len2 == 0) + return crc1; + + /* put operator for one zero bit in odd */ + odd[0] = 0xedb88320L; /* CRC-32 polynomial */ + row = 1; + for (n = 1; n < GF2_DIM; n++) { + odd[n] = row; + row <<= 1; + } + + /* put operator for two zero bits in even */ + gf2_matrix_square(even, odd); + + /* put operator for four zero bits in odd */ + gf2_matrix_square(odd, even); + + /* apply len2 zeros to crc1 (first square will put the operator for one + zero byte, eight zero bits, in even) */ + do { + /* apply zeros operator for this bit of len2 */ + gf2_matrix_square(even, odd); + if (len2 & 1) + crc1 = gf2_matrix_times(even, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + if (len2 == 0) + break; + + /* another iteration of the loop with odd and even swapped */ + gf2_matrix_square(odd, even); + if (len2 & 1) + crc1 = gf2_matrix_times(odd, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + } while (len2 != 0); + + /* return combined crc */ + crc1 ^= crc2; + return crc1; +} Added: external/zlib/crc32.h ============================================================================== --- (empty file) +++ external/zlib/crc32.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,441 @@ +/* crc32.h -- tables for rapid CRC calculation + * Generated automatically by crc32.c + */ + +local const unsigned long FAR crc_table[TBLS][256] = +{ + { + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL +#ifdef BYFOUR + }, + { + 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, + 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, + 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, + 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, + 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, + 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, + 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, + 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, + 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, + 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, + 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, + 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, + 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, + 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, + 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, + 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, + 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, + 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, + 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, + 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, + 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, + 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, + 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, + 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, + 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, + 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, + 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, + 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, + 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, + 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, + 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, + 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, + 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, + 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, + 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, + 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, + 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, + 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, + 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, + 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, + 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, + 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, + 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, + 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, + 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, + 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, + 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, + 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, + 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, + 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, + 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, + 0x9324fd72UL + }, + { + 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, + 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, + 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, + 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, + 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, + 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, + 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, + 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, + 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, + 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, + 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, + 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, + 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, + 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, + 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, + 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, + 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, + 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, + 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, + 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, + 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, + 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, + 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, + 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, + 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, + 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, + 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, + 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, + 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, + 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, + 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, + 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, + 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, + 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, + 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, + 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, + 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, + 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, + 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, + 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, + 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, + 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, + 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, + 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, + 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, + 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, + 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, + 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, + 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, + 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, + 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, + 0xbe9834edUL + }, + { + 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, + 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, + 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, + 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, + 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, + 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, + 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, + 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, + 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, + 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, + 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, + 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, + 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, + 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, + 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, + 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, + 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, + 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, + 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, + 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, + 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, + 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, + 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, + 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, + 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, + 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, + 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, + 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, + 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, + 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, + 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, + 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, + 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, + 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, + 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, + 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, + 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, + 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, + 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, + 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, + 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, + 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, + 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, + 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, + 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, + 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, + 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, + 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, + 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, + 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, + 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, + 0xde0506f1UL + }, + { + 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL, + 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL, + 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL, + 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL, + 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL, + 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL, + 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL, + 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL, + 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL, + 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL, + 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL, + 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL, + 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL, + 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL, + 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL, + 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL, + 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL, + 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL, + 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL, + 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL, + 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL, + 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL, + 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL, + 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL, + 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL, + 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL, + 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL, + 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL, + 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL, + 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL, + 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL, + 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL, + 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL, + 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL, + 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL, + 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL, + 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL, + 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL, + 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL, + 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL, + 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL, + 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL, + 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL, + 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL, + 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL, + 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL, + 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL, + 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL, + 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL, + 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL, + 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL, + 0x8def022dUL + }, + { + 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL, + 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL, + 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL, + 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL, + 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL, + 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL, + 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL, + 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL, + 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL, + 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL, + 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL, + 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL, + 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL, + 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL, + 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL, + 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL, + 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL, + 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL, + 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL, + 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL, + 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL, + 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL, + 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL, + 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL, + 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL, + 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL, + 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL, + 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL, + 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL, + 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL, + 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL, + 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL, + 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL, + 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL, + 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL, + 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL, + 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL, + 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL, + 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL, + 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL, + 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL, + 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL, + 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL, + 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL, + 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL, + 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL, + 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL, + 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL, + 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL, + 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL, + 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL, + 0x72fd2493UL + }, + { + 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL, + 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL, + 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL, + 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL, + 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL, + 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL, + 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL, + 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL, + 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL, + 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL, + 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL, + 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL, + 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL, + 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL, + 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL, + 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL, + 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL, + 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL, + 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL, + 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL, + 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL, + 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL, + 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL, + 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL, + 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL, + 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL, + 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL, + 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL, + 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL, + 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL, + 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL, + 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL, + 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL, + 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL, + 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL, + 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL, + 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL, + 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL, + 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL, + 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL, + 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL, + 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL, + 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL, + 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL, + 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL, + 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL, + 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL, + 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL, + 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL, + 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL, + 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL, + 0xed3498beUL + }, + { + 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL, + 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL, + 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL, + 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL, + 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL, + 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL, + 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL, + 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL, + 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL, + 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL, + 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL, + 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL, + 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL, + 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL, + 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL, + 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL, + 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL, + 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL, + 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL, + 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL, + 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL, + 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL, + 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL, + 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL, + 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL, + 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL, + 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL, + 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL, + 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL, + 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL, + 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL, + 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL, + 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL, + 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL, + 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL, + 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL, + 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL, + 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL, + 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL, + 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL, + 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL, + 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL, + 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL, + 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL, + 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL, + 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL, + 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL, + 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL, + 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL, + 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL, + 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL, + 0xf10605deUL +#endif + } +}; Added: external/zlib/deflate.c ============================================================================== --- (empty file) +++ external/zlib/deflate.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,1736 @@ +/* deflate.c -- compress data using the deflation algorithm + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process depends on being able to identify portions + * of the input text which are identical to earlier input (within a + * sliding window trailing behind the input currently being processed). + * + * The most straightforward technique turns out to be the fastest for + * most input files: try all possible matches and select the longest. + * The key feature of this algorithm is that insertions into the string + * dictionary are very simple and thus fast, and deletions are avoided + * completely. Insertions are performed at each input character, whereas + * string matches are performed only when the previous match ends. So it + * is preferable to spend more time in matches to allow very fast string + * insertions and avoid deletions. The matching algorithm for small + * strings is inspired from that of Rabin & Karp. A brute force approach + * is used to find longer strings when a small match has been found. + * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze + * (by Leonid Broukhis). + * A previous version of this file used a more sophisticated algorithm + * (by Fiala and Greene) which is guaranteed to run in linear amortized + * time, but has a larger average cost, uses more memory and is patented. + * However the F&G algorithm may be faster for some highly redundant + * files if the parameter max_chain_length (described below) is too large. + * + * ACKNOWLEDGEMENTS + * + * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and + * I found it in 'freeze' written by Leonid Broukhis. + * Thanks to many people for bug reports and testing. + * + * REFERENCES + * + * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". + * Available in http://www.ietf.org/rfc/rfc1951.txt + * + * A description of the Rabin and Karp algorithm is given in the book + * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. + * + * Fiala,E.R., and Greene,D.H. + * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 + * + */ + +/* @(#) $Id$ */ + +#include "deflate.h" + +const char deflate_copyright[] = + " deflate 1.2.3 Copyright 1995-2005 Jean-loup Gailly "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* =========================================================================== + * Function prototypes. + */ +typedef enum { + need_more, /* block not completed, need more input or more output */ + block_done, /* block flush performed */ + finish_started, /* finish started, need only more output at next deflate */ + finish_done /* finish done, accept no more input or output */ +} block_state; + +typedef block_state (*compress_func) OF((deflate_state *s, int flush)); +/* Compression function. Returns the block state after the call. */ + +local void fill_window OF((deflate_state *s)); +local block_state deflate_stored OF((deflate_state *s, int flush)); +local block_state deflate_fast OF((deflate_state *s, int flush)); +#ifndef FASTEST +local block_state deflate_slow OF((deflate_state *s, int flush)); +#endif +local void lm_init OF((deflate_state *s)); +local void putShortMSB OF((deflate_state *s, uInt b)); +local void flush_pending OF((z_streamp strm)); +local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); +#ifndef FASTEST +#ifdef ASMV + void match_init OF((void)); /* asm code initialization */ + uInt longest_match OF((deflate_state *s, IPos cur_match)); +#else +local uInt longest_match OF((deflate_state *s, IPos cur_match)); +#endif +#endif +local uInt longest_match_fast OF((deflate_state *s, IPos cur_match)); + +#ifdef DEBUG +local void check_match OF((deflate_state *s, IPos start, IPos match, + int length)); +#endif + +/* =========================================================================== + * Local data + */ + +#define NIL 0 +/* Tail of hash chains */ + +#ifndef TOO_FAR +# define TOO_FAR 4096 +#endif +/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +/* Values for max_lazy_match, good_match and max_chain_length, depending on + * the desired pack level (0..9). The values given below have been tuned to + * exclude worst case performance for pathological files. Better values may be + * found for specific files. + */ +typedef struct config_s { + ush good_length; /* reduce lazy search above this match length */ + ush max_lazy; /* do not perform lazy search above this match length */ + ush nice_length; /* quit search above this match length */ + ush max_chain; + compress_func func; +} config; + +#ifdef FASTEST +local const config configuration_table[2] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}}; /* max speed, no lazy matches */ +#else +local const config configuration_table[10] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */ +/* 2 */ {4, 5, 16, 8, deflate_fast}, +/* 3 */ {4, 6, 32, 32, deflate_fast}, + +/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ +/* 5 */ {8, 16, 32, 32, deflate_slow}, +/* 6 */ {8, 16, 128, 128, deflate_slow}, +/* 7 */ {8, 32, 128, 256, deflate_slow}, +/* 8 */ {32, 128, 258, 1024, deflate_slow}, +/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */ +#endif + +/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 + * For deflate_fast() (levels <= 3) good is ignored and lazy has a different + * meaning. + */ + +#define EQUAL 0 +/* result of memcmp for equal strings */ + +#ifndef NO_DUMMY_DECL +struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ +#endif + +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to to UPDATE_HASH are made with consecutive + * input characters, so that a running hash key can be computed from the + * previous key instead of complete recalculation each time. + */ +#define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) + + +/* =========================================================================== + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * If this file is compiled with -DFASTEST, the compression level is forced + * to 1, and no hash chains are maintained. + * IN assertion: all calls to to INSERT_STRING are made with consecutive + * input characters and the first MIN_MATCH bytes of str are valid + * (except for the last MIN_MATCH-1 bytes of the input file). + */ +#ifdef FASTEST +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#else +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#endif + +/* =========================================================================== + * Initialize the hash table (avoiding 64K overflow for 16 bit systems). + * prev[] will be initialized on the fly. + */ +#define CLEAR_HASH(s) \ + s->head[s->hash_size-1] = NIL; \ + zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); + +/* ========================================================================= */ +int ZEXPORT deflateInit_(strm, level, version, stream_size) + z_streamp strm; + int level; + const char *version; + int stream_size; +{ + return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY, version, stream_size); + /* To do: ignore strm->next_in if we use it as window */ +} + +/* ========================================================================= */ +int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + version, stream_size) + z_streamp strm; + int level; + int method; + int windowBits; + int memLevel; + int strategy; + const char *version; + int stream_size; +{ + deflate_state *s; + int wrap = 1; + static const char my_version[] = ZLIB_VERSION; + + ushf *overlay; + /* We overlay pending_buf and d_buf+l_buf. This works since the average + * output size for (length,distance) codes is <= 24 bits. + */ + + if (version == Z_NULL || version[0] != my_version[0] || + stream_size != sizeof(z_stream)) { + return Z_VERSION_ERROR; + } + if (strm == Z_NULL) return Z_STREAM_ERROR; + + strm->msg = Z_NULL; + if (strm->zalloc == (alloc_func)0) { + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; + } + if (strm->zfree == (free_func)0) strm->zfree = zcfree; + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + + if (windowBits < 0) { /* suppress zlib wrapper */ + wrap = 0; + windowBits = -windowBits; + } +#ifdef GZIP + else if (windowBits > 15) { + wrap = 2; /* write gzip wrapper instead */ + windowBits -= 16; + } +#endif + if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || + windowBits < 8 || windowBits > 15 || level < 0 || level > 9 || + strategy < 0 || strategy > Z_FIXED) { + return Z_STREAM_ERROR; + } + if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ + s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); + if (s == Z_NULL) return Z_MEM_ERROR; + strm->state = (struct internal_state FAR *)s; + s->strm = strm; + + s->wrap = wrap; + s->gzhead = Z_NULL; + s->w_bits = windowBits; + s->w_size = 1 << s->w_bits; + s->w_mask = s->w_size - 1; + + s->hash_bits = memLevel + 7; + s->hash_size = 1 << s->hash_bits; + s->hash_mask = s->hash_size - 1; + s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); + + s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); + s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); + s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); + + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + + overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); + s->pending_buf = (uchf *) overlay; + s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); + + if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || + s->pending_buf == Z_NULL) { + s->status = FINISH_STATE; + strm->msg = (char*)ERR_MSG(Z_MEM_ERROR); + deflateEnd (strm); + return Z_MEM_ERROR; + } + s->d_buf = overlay + s->lit_bufsize/sizeof(ush); + s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; + + s->level = level; + s->strategy = strategy; + s->method = (Byte)method; + + return deflateReset(strm); +} + +/* ========================================================================= */ +int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) + z_streamp strm; + const Bytef *dictionary; + uInt dictLength; +{ + deflate_state *s; + uInt length = dictLength; + uInt n; + IPos hash_head = 0; + + if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL || + strm->state->wrap == 2 || + (strm->state->wrap == 1 && strm->state->status != INIT_STATE)) + return Z_STREAM_ERROR; + + s = strm->state; + if (s->wrap) + strm->adler = adler32(strm->adler, dictionary, dictLength); + + if (length < MIN_MATCH) return Z_OK; + if (length > MAX_DIST(s)) { + length = MAX_DIST(s); + dictionary += dictLength - length; /* use the tail of the dictionary */ + } + zmemcpy(s->window, dictionary, length); + s->strstart = length; + s->block_start = (long)length; + + /* Insert all strings in the hash table (except for the last two bytes). + * s->lookahead stays null, so s->ins_h will be recomputed at the next + * call of fill_window. + */ + s->ins_h = s->window[0]; + UPDATE_HASH(s, s->ins_h, s->window[1]); + for (n = 0; n <= length - MIN_MATCH; n++) { + INSERT_STRING(s, n, hash_head); + } + if (hash_head) hash_head = 0; /* to make compiler happy */ + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateReset (strm) + z_streamp strm; +{ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL || + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) { + return Z_STREAM_ERROR; + } + + strm->total_in = strm->total_out = 0; + strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */ + strm->data_type = Z_UNKNOWN; + + s = (deflate_state *)strm->state; + s->pending = 0; + s->pending_out = s->pending_buf; + + if (s->wrap < 0) { + s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */ + } + s->status = s->wrap ? INIT_STATE : BUSY_STATE; + strm->adler = +#ifdef GZIP + s->wrap == 2 ? crc32(0L, Z_NULL, 0) : +#endif + adler32(0L, Z_NULL, 0); + s->last_flush = Z_NO_FLUSH; + + _tr_init(s); + lm_init(s); + + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateSetHeader (strm, head) + z_streamp strm; + gz_headerp head; +{ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + if (strm->state->wrap != 2) return Z_STREAM_ERROR; + strm->state->gzhead = head; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflatePrime (strm, bits, value) + z_streamp strm; + int bits; + int value; +{ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + strm->state->bi_valid = bits; + strm->state->bi_buf = (ush)(value & ((1 << bits) - 1)); + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateParams(strm, level, strategy) + z_streamp strm; + int level; + int strategy; +{ + deflate_state *s; + compress_func func; + int err = Z_OK; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + s = strm->state; + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) { + return Z_STREAM_ERROR; + } + func = configuration_table[s->level].func; + + if (func != configuration_table[level].func && strm->total_in != 0) { + /* Flush the last buffer: */ + err = deflate(strm, Z_PARTIAL_FLUSH); + } + if (s->level != level) { + s->level = level; + s->max_lazy_match = configuration_table[level].max_lazy; + s->good_match = configuration_table[level].good_length; + s->nice_match = configuration_table[level].nice_length; + s->max_chain_length = configuration_table[level].max_chain; + } + s->strategy = strategy; + return err; +} + +/* ========================================================================= */ +int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain) + z_streamp strm; + int good_length; + int max_lazy; + int nice_length; + int max_chain; +{ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + s = strm->state; + s->good_match = good_length; + s->max_lazy_match = max_lazy; + s->nice_match = nice_length; + s->max_chain_length = max_chain; + return Z_OK; +} + +/* ========================================================================= + * For the default windowBits of 15 and memLevel of 8, this function returns + * a close to exact, as well as small, upper bound on the compressed size. + * They are coded as constants here for a reason--if the #define's are + * changed, then this function needs to be changed as well. The return + * value for 15 and 8 only works for those exact settings. + * + * For any setting other than those defaults for windowBits and memLevel, + * the value returned is a conservative worst case for the maximum expansion + * resulting from using fixed blocks instead of stored blocks, which deflate + * can emit on compressed data for some combinations of the parameters. + * + * This function could be more sophisticated to provide closer upper bounds + * for every combination of windowBits and memLevel, as well as wrap. + * But even the conservative upper bound of about 14% expansion does not + * seem onerous for output buffer allocation. + */ +uLong ZEXPORT deflateBound(strm, sourceLen) + z_streamp strm; + uLong sourceLen; +{ + deflate_state *s; + uLong destLen; + + /* conservative upper bound */ + destLen = sourceLen + + ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 11; + + /* if can't get parameters, return conservative bound */ + if (strm == Z_NULL || strm->state == Z_NULL) + return destLen; + + /* if not default parameters, return conservative bound */ + s = strm->state; + if (s->w_bits != 15 || s->hash_bits != 8 + 7) + return destLen; + + /* default settings: return tight bound for that case */ + return compressBound(sourceLen); +} + +/* ========================================================================= + * Put a short in the pending buffer. The 16-bit value is put in MSB order. + * IN assertion: the stream state is correct and there is enough room in + * pending_buf. + */ +local void putShortMSB (s, b) + deflate_state *s; + uInt b; +{ + put_byte(s, (Byte)(b >> 8)); + put_byte(s, (Byte)(b & 0xff)); +} + +/* ========================================================================= + * Flush as much pending output as possible. All deflate() output goes + * through this function so some applications may wish to modify it + * to avoid allocating a large strm->next_out buffer and copying into it. + * (See also read_buf()). + */ +local void flush_pending(strm) + z_streamp strm; +{ + unsigned len = strm->state->pending; + + if (len > strm->avail_out) len = strm->avail_out; + if (len == 0) return; + + zmemcpy(strm->next_out, strm->state->pending_out, len); + strm->next_out += len; + strm->state->pending_out += len; + strm->total_out += len; + strm->avail_out -= len; + strm->state->pending -= len; + if (strm->state->pending == 0) { + strm->state->pending_out = strm->state->pending_buf; + } +} + +/* ========================================================================= */ +int ZEXPORT deflate (strm, flush) + z_streamp strm; + int flush; +{ + int old_flush; /* value of flush param for previous deflate call */ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL || + flush > Z_FINISH || flush < 0) { + return Z_STREAM_ERROR; + } + s = strm->state; + + if (strm->next_out == Z_NULL || + (strm->next_in == Z_NULL && strm->avail_in != 0) || + (s->status == FINISH_STATE && flush != Z_FINISH)) { + ERR_RETURN(strm, Z_STREAM_ERROR); + } + if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); + + s->strm = strm; /* just in case */ + old_flush = s->last_flush; + s->last_flush = flush; + + /* Write the header */ + if (s->status == INIT_STATE) { +#ifdef GZIP + if (s->wrap == 2) { + strm->adler = crc32(0L, Z_NULL, 0); + put_byte(s, 31); + put_byte(s, 139); + put_byte(s, 8); + if (s->gzhead == NULL) { + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, OS_CODE); + s->status = BUSY_STATE; + } + else { + put_byte(s, (s->gzhead->text ? 1 : 0) + + (s->gzhead->hcrc ? 2 : 0) + + (s->gzhead->extra == Z_NULL ? 0 : 4) + + (s->gzhead->name == Z_NULL ? 0 : 8) + + (s->gzhead->comment == Z_NULL ? 0 : 16) + ); + put_byte(s, (Byte)(s->gzhead->time & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff)); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, s->gzhead->os & 0xff); + if (s->gzhead->extra != NULL) { + put_byte(s, s->gzhead->extra_len & 0xff); + put_byte(s, (s->gzhead->extra_len >> 8) & 0xff); + } + if (s->gzhead->hcrc) + strm->adler = crc32(strm->adler, s->pending_buf, + s->pending); + s->gzindex = 0; + s->status = EXTRA_STATE; + } + } + else +#endif + { + uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; + uInt level_flags; + + if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2) + level_flags = 0; + else if (s->level < 6) + level_flags = 1; + else if (s->level == 6) + level_flags = 2; + else + level_flags = 3; + header |= (level_flags << 6); + if (s->strstart != 0) header |= PRESET_DICT; + header += 31 - (header % 31); + + s->status = BUSY_STATE; + putShortMSB(s, header); + + /* Save the adler32 of the preset dictionary: */ + if (s->strstart != 0) { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + strm->adler = adler32(0L, Z_NULL, 0); + } + } +#ifdef GZIP + if (s->status == EXTRA_STATE) { + if (s->gzhead->extra != NULL) { + uInt beg = s->pending; /* start of bytes to update crc */ + + while (s->gzindex < (s->gzhead->extra_len & 0xffff)) { + if (s->pending == s->pending_buf_size) { + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + flush_pending(strm); + beg = s->pending; + if (s->pending == s->pending_buf_size) + break; + } + put_byte(s, s->gzhead->extra[s->gzindex]); + s->gzindex++; + } + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + if (s->gzindex == s->gzhead->extra_len) { + s->gzindex = 0; + s->status = NAME_STATE; + } + } + else + s->status = NAME_STATE; + } + if (s->status == NAME_STATE) { + if (s->gzhead->name != NULL) { + uInt beg = s->pending; /* start of bytes to update crc */ + int val; + + do { + if (s->pending == s->pending_buf_size) { + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + flush_pending(strm); + beg = s->pending; + if (s->pending == s->pending_buf_size) { + val = 1; + break; + } + } + val = s->gzhead->name[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + if (val == 0) { + s->gzindex = 0; + s->status = COMMENT_STATE; + } + } + else + s->status = COMMENT_STATE; + } + if (s->status == COMMENT_STATE) { + if (s->gzhead->comment != NULL) { + uInt beg = s->pending; /* start of bytes to update crc */ + int val; + + do { + if (s->pending == s->pending_buf_size) { + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + flush_pending(strm); + beg = s->pending; + if (s->pending == s->pending_buf_size) { + val = 1; + break; + } + } + val = s->gzhead->comment[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + if (val == 0) + s->status = HCRC_STATE; + } + else + s->status = HCRC_STATE; + } + if (s->status == HCRC_STATE) { + if (s->gzhead->hcrc) { + if (s->pending + 2 > s->pending_buf_size) + flush_pending(strm); + if (s->pending + 2 <= s->pending_buf_size) { + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + strm->adler = crc32(0L, Z_NULL, 0); + s->status = BUSY_STATE; + } + } + else + s->status = BUSY_STATE; + } +#endif + + /* Flush as much pending output as possible */ + if (s->pending != 0) { + flush_pending(strm); + if (strm->avail_out == 0) { + /* Since avail_out is 0, deflate will be called again with + * more output space, but possibly with both pending and + * avail_in equal to zero. There won't be anything to do, + * but this is not an error situation so make sure we + * return OK instead of BUF_ERROR at next call of deflate: + */ + s->last_flush = -1; + return Z_OK; + } + + /* Make sure there is something to do and avoid duplicate consecutive + * flushes. For repeated and useless calls with Z_FINISH, we keep + * returning Z_STREAM_END instead of Z_BUF_ERROR. + */ + } else if (strm->avail_in == 0 && flush <= old_flush && + flush != Z_FINISH) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* User must not provide more input after the first FINISH: */ + if (s->status == FINISH_STATE && strm->avail_in != 0) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* Start a new block or continue the current one. + */ + if (strm->avail_in != 0 || s->lookahead != 0 || + (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { + block_state bstate; + + bstate = (*(configuration_table[s->level].func))(s, flush); + + if (bstate == finish_started || bstate == finish_done) { + s->status = FINISH_STATE; + } + if (bstate == need_more || bstate == finish_started) { + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ + } + return Z_OK; + /* If flush != Z_NO_FLUSH && avail_out == 0, the next call + * of deflate should use the same flush parameter to make sure + * that the flush is complete. So we don't have to output an + * empty block here, this will be done at next call. This also + * ensures that for a very small output buffer, we emit at most + * one empty block. + */ + } + if (bstate == block_done) { + if (flush == Z_PARTIAL_FLUSH) { + _tr_align(s); + } else { /* FULL_FLUSH or SYNC_FLUSH */ + _tr_stored_block(s, (char*)0, 0L, 0); + /* For a full flush, this empty block will be recognized + * as a special marker by inflate_sync(). + */ + if (flush == Z_FULL_FLUSH) { + CLEAR_HASH(s); /* forget history */ + } + } + flush_pending(strm); + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ + return Z_OK; + } + } + } + Assert(strm->avail_out > 0, "bug2"); + + if (flush != Z_FINISH) return Z_OK; + if (s->wrap <= 0) return Z_STREAM_END; + + /* Write the trailer */ +#ifdef GZIP + if (s->wrap == 2) { + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 24) & 0xff)); + put_byte(s, (Byte)(strm->total_in & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 8) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 16) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 24) & 0xff)); + } + else +#endif + { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + flush_pending(strm); + /* If avail_out is zero, the application will call deflate again + * to flush the rest. + */ + if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */ + return s->pending != 0 ? Z_OK : Z_STREAM_END; +} + +/* ========================================================================= */ +int ZEXPORT deflateEnd (strm) + z_streamp strm; +{ + int status; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + + status = strm->state->status; + if (status != INIT_STATE && + status != EXTRA_STATE && + status != NAME_STATE && + status != COMMENT_STATE && + status != HCRC_STATE && + status != BUSY_STATE && + status != FINISH_STATE) { + return Z_STREAM_ERROR; + } + + /* Deallocate in reverse order of allocations: */ + TRY_FREE(strm, strm->state->pending_buf); + TRY_FREE(strm, strm->state->head); + TRY_FREE(strm, strm->state->prev); + TRY_FREE(strm, strm->state->window); + + ZFREE(strm, strm->state); + strm->state = Z_NULL; + + return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; +} + +/* ========================================================================= + * Copy the source state to the destination state. + * To simplify the source, this is not supported for 16-bit MSDOS (which + * doesn't have enough memory anyway to duplicate compression states). + */ +int ZEXPORT deflateCopy (dest, source) + z_streamp dest; + z_streamp source; +{ +#ifdef MAXSEG_64K + return Z_STREAM_ERROR; +#else + deflate_state *ds; + deflate_state *ss; + ushf *overlay; + + + if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) { + return Z_STREAM_ERROR; + } + + ss = source->state; + + zmemcpy(dest, source, sizeof(z_stream)); + + ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); + if (ds == Z_NULL) return Z_MEM_ERROR; + dest->state = (struct internal_state FAR *) ds; + zmemcpy(ds, ss, sizeof(deflate_state)); + ds->strm = dest; + + ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); + ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); + ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); + overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); + ds->pending_buf = (uchf *) overlay; + + if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || + ds->pending_buf == Z_NULL) { + deflateEnd (dest); + return Z_MEM_ERROR; + } + /* following zmemcpy do not work for 16-bit MSDOS */ + zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); + zmemcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos)); + zmemcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos)); + zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); + + ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); + ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); + ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; + + ds->l_desc.dyn_tree = ds->dyn_ltree; + ds->d_desc.dyn_tree = ds->dyn_dtree; + ds->bl_desc.dyn_tree = ds->bl_tree; + + return Z_OK; +#endif /* MAXSEG_64K */ +} + +/* =========================================================================== + * Read a new buffer from the current input stream, update the adler32 + * and total number of bytes read. All deflate() input goes through + * this function so some applications may wish to modify it to avoid + * allocating a large strm->next_in buffer and copying from it. + * (See also flush_pending()). + */ +local int read_buf(strm, buf, size) + z_streamp strm; + Bytef *buf; + unsigned size; +{ + unsigned len = strm->avail_in; + + if (len > size) len = size; + if (len == 0) return 0; + + strm->avail_in -= len; + + if (strm->state->wrap == 1) { + strm->adler = adler32(strm->adler, strm->next_in, len); + } +#ifdef GZIP + else if (strm->state->wrap == 2) { + strm->adler = crc32(strm->adler, strm->next_in, len); + } +#endif + zmemcpy(buf, strm->next_in, len); + strm->next_in += len; + strm->total_in += len; + + return (int)len; +} + +/* =========================================================================== + * Initialize the "longest match" routines for a new zlib stream + */ +local void lm_init (s) + deflate_state *s; +{ + s->window_size = (ulg)2L*s->w_size; + + CLEAR_HASH(s); + + /* Set the default configuration parameters: + */ + s->max_lazy_match = configuration_table[s->level].max_lazy; + s->good_match = configuration_table[s->level].good_length; + s->nice_match = configuration_table[s->level].nice_length; + s->max_chain_length = configuration_table[s->level].max_chain; + + s->strstart = 0; + s->block_start = 0L; + s->lookahead = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + s->ins_h = 0; +#ifndef FASTEST +#ifdef ASMV + match_init(); /* initialize the asm code */ +#endif +#endif +} + +#ifndef FASTEST +/* =========================================================================== + * Set match_start to the longest match starting at the given string and + * return its length. Matches shorter or equal to prev_length are discarded, + * in which case the result is equal to prev_length and match_start is + * garbage. + * IN assertions: cur_match is the head of the hash chain for the current + * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 + * OUT assertion: the match length is not greater than s->lookahead. + */ +#ifndef ASMV +/* For 80x86 and 680x0, an optimized version will be provided in match.asm or + * match.S. The code will be functionally equivalent. + */ +local uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + unsigned chain_length = s->max_chain_length;/* max hash chain length */ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + int best_len = s->prev_length; /* best match length so far */ + int nice_match = s->nice_match; /* stop if match long enough */ + IPos limit = s->strstart > (IPos)MAX_DIST(s) ? + s->strstart - (IPos)MAX_DIST(s) : NIL; + /* Stop when cur_match becomes <= limit. To simplify the code, + * we prevent matches with the string of window index 0. + */ + Posf *prev = s->prev; + uInt wmask = s->w_mask; + +#ifdef UNALIGNED_OK + /* Compare two bytes at a time. Note: this is not always beneficial. + * Try with and without -DUNALIGNED_OK to check. + */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; + register ush scan_start = *(ushf*)scan; + register ush scan_end = *(ushf*)(scan+best_len-1); +#else + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + register Byte scan_end1 = scan[best_len-1]; + register Byte scan_end = scan[best_len]; +#endif + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + /* Do not waste too much time if we already have a good match: */ + if (s->prev_length >= s->good_match) { + chain_length >>= 2; + } + /* Do not look for matches beyond the end of the input. This is necessary + * to make deflate deterministic. + */ + if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + do { + Assert(cur_match < s->strstart, "no future"); + match = s->window + cur_match; + + /* Skip to next match if the match length cannot increase + * or if the match length is less than 2. Note that the checks below + * for insufficient lookahead only occur occasionally for performance + * reasons. Therefore uninitialized memory will be accessed, and + * conditional jumps will be made that depend on those values. + * However the length of the match is limited to the lookahead, so + * the output of deflate is not affected by the uninitialized values. + */ +#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) + /* This code assumes sizeof(unsigned short) == 2. Do not use + * UNALIGNED_OK if your compiler uses a different size. + */ + if (*(ushf*)(match+best_len-1) != scan_end || + *(ushf*)match != scan_start) continue; + + /* It is not necessary to compare scan[2] and match[2] since they are + * always equal when the other bytes match, given that the hash keys + * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at + * strstart+3, +5, ... up to strstart+257. We check for insufficient + * lookahead only every 4th comparison; the 128th check will be made + * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is + * necessary to put more guard bytes at the end of the window, or + * to check more often for insufficient lookahead. + */ + Assert(scan[2] == match[2], "scan[2]?"); + scan++, match++; + do { + } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + scan < strend); + /* The funny "do {}" generates better code on most compilers */ + + /* Here, scan <= window+strstart+257 */ + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + if (*scan == *match) scan++; + + len = (MAX_MATCH - 1) - (int)(strend-scan); + scan = strend - (MAX_MATCH-1); + +#else /* UNALIGNED_OK */ + + if (match[best_len] != scan_end || + match[best_len-1] != scan_end1 || + *match != *scan || + *++match != scan[1]) continue; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match++; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + scan = strend - MAX_MATCH; + +#endif /* UNALIGNED_OK */ + + if (len > best_len) { + s->match_start = cur_match; + best_len = len; + if (len >= nice_match) break; +#ifdef UNALIGNED_OK + scan_end = *(ushf*)(scan+best_len-1); +#else + scan_end1 = scan[best_len-1]; + scan_end = scan[best_len]; +#endif + } + } while ((cur_match = prev[cur_match & wmask]) > limit + && --chain_length != 0); + + if ((uInt)best_len <= s->lookahead) return (uInt)best_len; + return s->lookahead; +} +#endif /* ASMV */ +#endif /* FASTEST */ + +/* --------------------------------------------------------------------------- + * Optimized version for level == 1 or strategy == Z_RLE only + */ +local uInt longest_match_fast(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + Assert(cur_match < s->strstart, "no future"); + + match = s->window + cur_match; + + /* Return failure if the match length is less than 2: + */ + if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match += 2; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + + if (len < MIN_MATCH) return MIN_MATCH - 1; + + s->match_start = cur_match; + return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead; +} + +#ifdef DEBUG +/* =========================================================================== + * Check that the match at match_start is indeed a match. + */ +local void check_match(s, start, match, length) + deflate_state *s; + IPos start, match; + int length; +{ + /* check that the match is indeed a match */ + if (zmemcmp(s->window + match, + s->window + start, length) != EQUAL) { + fprintf(stderr, " start %u, match %u, length %d\n", + start, match, length); + do { + fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); + } while (--length != 0); + z_error("invalid match"); + } + if (z_verbose > 1) { + fprintf(stderr,"\\[%d,%d]", start-match, length); + do { putc(s->window[start++], stderr); } while (--length != 0); + } +} +#else +# define check_match(s, start, match, length) +#endif /* DEBUG */ + +/* =========================================================================== + * Fill the window when the lookahead becomes insufficient. + * Updates strstart and lookahead. + * + * IN assertion: lookahead < MIN_LOOKAHEAD + * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD + * At least one byte has been read, or avail_in == 0; reads are + * performed for at least two bytes (required for the zip translate_eol + * option -- not supported here). + */ +local void fill_window(s) + deflate_state *s; +{ + register unsigned n, m; + register Posf *p; + unsigned more; /* Amount of free space at the end of the window. */ + uInt wsize = s->w_size; + + do { + more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); + + /* Deal with !@#$% 64K limit: */ + if (sizeof(int) <= 2) { + if (more == 0 && s->strstart == 0 && s->lookahead == 0) { + more = wsize; + + } else if (more == (unsigned)(-1)) { + /* Very unlikely, but possible on 16 bit machine if + * strstart == 0 && lookahead == 1 (input done a byte at time) + */ + more--; + } + } + + /* If the window is almost full and there is insufficient lookahead, + * move the upper half to the lower one to make room in the upper half. + */ + if (s->strstart >= wsize+MAX_DIST(s)) { + + zmemcpy(s->window, s->window+wsize, (unsigned)wsize); + s->match_start -= wsize; + s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ + s->block_start -= (long) wsize; + + /* Slide the hash table (could be avoided with 32 bit values + at the expense of memory usage). We slide even when level == 0 + to keep the hash table consistent if we switch back to level > 0 + later. (Using level 0 permanently is not an optimal usage of + zlib, so we don't care about this pathological case.) + */ + /* %%% avoid this when Z_RLE */ + n = s->hash_size; + p = &s->head[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m-wsize : NIL); + } while (--n); + + n = wsize; +#ifndef FASTEST + p = &s->prev[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m-wsize : NIL); + /* If n is not on any hash chain, prev[n] is garbage but + * its value will never be used. + */ + } while (--n); +#endif + more += wsize; + } + if (s->strm->avail_in == 0) return; + + /* If there was no sliding: + * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && + * more == window_size - lookahead - strstart + * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) + * => more >= window_size - 2*WSIZE + 2 + * In the BIG_MEM or MMAP case (not yet supported), + * window_size == input_size + MIN_LOOKAHEAD && + * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. + * Otherwise, window_size == 2*WSIZE so more >= 2. + * If there was sliding, more >= WSIZE. So in all cases, more >= 2. + */ + Assert(more >= 2, "more < 2"); + + n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + s->lookahead += n; + + /* Initialize the hash value now that we have some input: */ + if (s->lookahead >= MIN_MATCH) { + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + } + /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, + * but this is not important since only literal bytes will be emitted. + */ + + } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); +} + +/* =========================================================================== + * Flush the current block, with given end-of-file flag. + * IN assertion: strstart is set to the end of the current match. + */ +#define FLUSH_BLOCK_ONLY(s, eof) { \ + _tr_flush_block(s, (s->block_start >= 0L ? \ + (charf *)&s->window[(unsigned)s->block_start] : \ + (charf *)Z_NULL), \ + (ulg)((long)s->strstart - s->block_start), \ + (eof)); \ + s->block_start = s->strstart; \ + flush_pending(s->strm); \ + Tracev((stderr,"[FLUSH]")); \ +} + +/* Same but force premature exit if necessary. */ +#define FLUSH_BLOCK(s, eof) { \ + FLUSH_BLOCK_ONLY(s, eof); \ + if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \ +} + +/* =========================================================================== + * Copy without compression as much as possible from the input stream, return + * the current block state. + * This function does not insert new strings in the dictionary since + * uncompressible data is probably not useful. This function is used + * only for the level=0 compression option. + * NOTE: this function should be optimized to avoid extra copying from + * window to pending_buf. + */ +local block_state deflate_stored(s, flush) + deflate_state *s; + int flush; +{ + /* Stored blocks are limited to 0xffff bytes, pending_buf is limited + * to pending_buf_size, and each stored block has a 5 byte header: + */ + ulg max_block_size = 0xffff; + ulg max_start; + + if (max_block_size > s->pending_buf_size - 5) { + max_block_size = s->pending_buf_size - 5; + } + + /* Copy as much as possible from input to output: */ + for (;;) { + /* Fill the window as much as possible: */ + if (s->lookahead <= 1) { + + Assert(s->strstart < s->w_size+MAX_DIST(s) || + s->block_start >= (long)s->w_size, "slide too late"); + + fill_window(s); + if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more; + + if (s->lookahead == 0) break; /* flush the current block */ + } + Assert(s->block_start >= 0L, "block gone"); + + s->strstart += s->lookahead; + s->lookahead = 0; + + /* Emit a stored block if pending_buf will be full: */ + max_start = s->block_start + max_block_size; + if (s->strstart == 0 || (ulg)s->strstart >= max_start) { + /* strstart == 0 is possible when wraparound on 16-bit machine */ + s->lookahead = (uInt)(s->strstart - max_start); + s->strstart = (uInt)max_start; + FLUSH_BLOCK(s, 0); + } + /* Flush if we may have to slide, otherwise block_start may become + * negative and the data will be gone: + */ + if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) { + FLUSH_BLOCK(s, 0); + } + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} + +/* =========================================================================== + * Compress as much as possible from the input stream, return the current + * block state. + * This function does not perform lazy evaluation of matches and inserts + * new strings in the dictionary only for unmatched strings or for short + * matches. It is used only for the fast compression options. + */ +local block_state deflate_fast(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head = NIL; /* head of the hash chain */ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + * At this point we have always match_length < MIN_MATCH + */ + if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ +#ifdef FASTEST + if ((s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) || + (s->strategy == Z_RLE && s->strstart - hash_head == 1)) { + s->match_length = longest_match_fast (s, hash_head); + } +#else + if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) { + s->match_length = longest_match (s, hash_head); + } else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) { + s->match_length = longest_match_fast (s, hash_head); + } +#endif + /* longest_match() or longest_match_fast() sets match_start */ + } + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->match_start, s->match_length); + + _tr_tally_dist(s, s->strstart - s->match_start, + s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + + /* Insert new strings in the hash table only if the match length + * is not too large. This saves time but degrades compression. + */ +#ifndef FASTEST + if (s->match_length <= s->max_insert_length && + s->lookahead >= MIN_MATCH) { + s->match_length--; /* string at strstart already in table */ + do { + s->strstart++; + INSERT_STRING(s, s->strstart, hash_head); + /* strstart never exceeds WSIZE-MAX_MATCH, so there are + * always MIN_MATCH bytes ahead. + */ + } while (--s->match_length != 0); + s->strstart++; + } else +#endif + { + s->strstart += s->match_length; + s->match_length = 0; + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not + * matter since it will be recomputed at next deflate call. + */ + } + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} + +#ifndef FASTEST +/* =========================================================================== + * Same as above, but achieves better compression. We use a lazy + * evaluation for matches: a match is finally adopted only if there is + * no better match at the next window position. + */ +local block_state deflate_slow(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head = NIL; /* head of hash chain */ + int bflush; /* set if current block must be flushed */ + + /* Process the input block. */ + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + */ + s->prev_length = s->match_length, s->prev_match = s->match_start; + s->match_length = MIN_MATCH-1; + + if (hash_head != NIL && s->prev_length < s->max_lazy_match && + s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) { + s->match_length = longest_match (s, hash_head); + } else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) { + s->match_length = longest_match_fast (s, hash_head); + } + /* longest_match() or longest_match_fast() sets match_start */ + + if (s->match_length <= 5 && (s->strategy == Z_FILTERED +#if TOO_FAR <= 32767 + || (s->match_length == MIN_MATCH && + s->strstart - s->match_start > TOO_FAR) +#endif + )) { + + /* If prev_match is also MIN_MATCH, match_start is garbage + * but we will ignore the current match anyway. + */ + s->match_length = MIN_MATCH-1; + } + } + /* If there was a match at the previous step and the current + * match is not better, output the previous match: + */ + if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { + uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; + /* Do not insert strings in hash table beyond this. */ + + check_match(s, s->strstart-1, s->prev_match, s->prev_length); + + _tr_tally_dist(s, s->strstart -1 - s->prev_match, + s->prev_length - MIN_MATCH, bflush); + + /* Insert in hash table all strings up to the end of the match. + * strstart-1 and strstart are already inserted. If there is not + * enough lookahead, the last two strings are not inserted in + * the hash table. + */ + s->lookahead -= s->prev_length-1; + s->prev_length -= 2; + do { + if (++s->strstart <= max_insert) { + INSERT_STRING(s, s->strstart, hash_head); + } + } while (--s->prev_length != 0); + s->match_available = 0; + s->match_length = MIN_MATCH-1; + s->strstart++; + + if (bflush) FLUSH_BLOCK(s, 0); + + } else if (s->match_available) { + /* If there was no match at the previous position, output a + * single literal. If there was a match but the current match + * is longer, truncate the previous match to a single literal. + */ + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + if (bflush) { + FLUSH_BLOCK_ONLY(s, 0); + } + s->strstart++; + s->lookahead--; + if (s->strm->avail_out == 0) return need_more; + } else { + /* There is no previous match to compare with, wait for + * the next step to decide. + */ + s->match_available = 1; + s->strstart++; + s->lookahead--; + } + } + Assert (flush != Z_NO_FLUSH, "no flush?"); + if (s->match_available) { + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + s->match_available = 0; + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} +#endif /* FASTEST */ + +#if 0 +/* =========================================================================== + * For Z_RLE, simply look for runs of bytes, generate matches only of distance + * one. Do not maintain a hash table. (It will be regenerated if this run of + * deflate switches away from Z_RLE.) + */ +local block_state deflate_rle(s, flush) + deflate_state *s; + int flush; +{ + int bflush; /* set if current block must be flushed */ + uInt run; /* length of run */ + uInt max; /* maximum length of run */ + uInt prev; /* byte at distance one to match */ + Bytef *scan; /* scan for end of run */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the longest encodable run. + */ + if (s->lookahead < MAX_MATCH) { + fill_window(s); + if (s->lookahead < MAX_MATCH && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* See how many times the previous byte repeats */ + run = 0; + if (s->strstart > 0) { /* if there is a previous byte, that is */ + max = s->lookahead < MAX_MATCH ? s->lookahead : MAX_MATCH; + scan = s->window + s->strstart - 1; + prev = *scan++; + do { + if (*scan++ != prev) + break; + } while (++run < max); + } + + /* Emit match if have run of MIN_MATCH or longer, else emit literal */ + if (run >= MIN_MATCH) { + check_match(s, s->strstart, s->strstart - 1, run); + _tr_tally_dist(s, 1, run - MIN_MATCH, bflush); + s->lookahead -= run; + s->strstart += run; + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} +#endif Added: external/zlib/deflate.h ============================================================================== --- (empty file) +++ external/zlib/deflate.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,331 @@ +/* deflate.h -- internal compression state + * Copyright (C) 1995-2004 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef DEFLATE_H +#define DEFLATE_H + +#include "zutil.h" + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer creation by deflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip encoding + should be left enabled. */ +#ifndef NO_GZIP +# define GZIP +#endif + +/* =========================================================================== + * Internal compression state. + */ + +#define LENGTH_CODES 29 +/* number of length codes, not counting the special END_BLOCK code */ + +#define LITERALS 256 +/* number of literal bytes 0..255 */ + +#define L_CODES (LITERALS+1+LENGTH_CODES) +/* number of Literal or Length codes, including the END_BLOCK code */ + +#define D_CODES 30 +/* number of distance codes */ + +#define BL_CODES 19 +/* number of codes used to transfer the bit lengths */ + +#define HEAP_SIZE (2*L_CODES+1) +/* maximum heap size */ + +#define MAX_BITS 15 +/* All codes must not exceed MAX_BITS bits */ + +#define INIT_STATE 42 +#define EXTRA_STATE 69 +#define NAME_STATE 73 +#define COMMENT_STATE 91 +#define HCRC_STATE 103 +#define BUSY_STATE 113 +#define FINISH_STATE 666 +/* Stream status */ + + +/* Data structure describing a single value and its code string. */ +typedef struct ct_data_s { + union { + ush freq; /* frequency count */ + ush code; /* bit string */ + } fc; + union { + ush dad; /* father node in Huffman tree */ + ush len; /* length of bit string */ + } dl; +} FAR ct_data; + +#define Freq fc.freq +#define Code fc.code +#define Dad dl.dad +#define Len dl.len + +typedef struct static_tree_desc_s static_tree_desc; + +typedef struct tree_desc_s { + ct_data *dyn_tree; /* the dynamic tree */ + int max_code; /* largest code with non zero frequency */ + static_tree_desc *stat_desc; /* the corresponding static tree */ +} FAR tree_desc; + +typedef ush Pos; +typedef Pos FAR Posf; +typedef unsigned IPos; + +/* A Pos is an index in the character window. We use short instead of int to + * save space in the various tables. IPos is used only for parameter passing. + */ + +typedef struct internal_state { + z_streamp strm; /* pointer back to this zlib stream */ + int status; /* as the name implies */ + Bytef *pending_buf; /* output still pending */ + ulg pending_buf_size; /* size of pending_buf */ + Bytef *pending_out; /* next pending byte to output to the stream */ + uInt pending; /* nb of bytes in the pending buffer */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ + gz_headerp gzhead; /* gzip header information to write */ + uInt gzindex; /* where in extra, name, or comment */ + Byte method; /* STORED (for zip only) or DEFLATED */ + int last_flush; /* value of flush param for previous deflate call */ + + /* used by deflate.c: */ + + uInt w_size; /* LZ77 window size (32K by default) */ + uInt w_bits; /* log2(w_size) (8..16) */ + uInt w_mask; /* w_size - 1 */ + + Bytef *window; + /* Sliding window. Input bytes are read into the second half of the window, + * and move to the first half later to keep a dictionary of at least wSize + * bytes. With this organization, matches are limited to a distance of + * wSize-MAX_MATCH bytes, but this ensures that IO is always + * performed with a length multiple of the block size. Also, it limits + * the window size to 64K, which is quite useful on MSDOS. + * To do: use the user input buffer as sliding window. + */ + + ulg window_size; + /* Actual size of window: 2*wSize, except when the user input buffer + * is directly used as sliding window. + */ + + Posf *prev; + /* Link to older string with same hash index. To limit the size of this + * array to 64K, this link is maintained only for the last 32K strings. + * An index in this array is thus a window index modulo 32K. + */ + + Posf *head; /* Heads of the hash chains or NIL. */ + + uInt ins_h; /* hash index of string to be inserted */ + uInt hash_size; /* number of elements in hash table */ + uInt hash_bits; /* log2(hash_size) */ + uInt hash_mask; /* hash_size-1 */ + + uInt hash_shift; + /* Number of bits by which ins_h must be shifted at each input + * step. It must be such that after MIN_MATCH steps, the oldest + * byte no longer takes part in the hash key, that is: + * hash_shift * MIN_MATCH >= hash_bits + */ + + long block_start; + /* Window position at the beginning of the current output block. Gets + * negative when the window is moved backwards. + */ + + uInt match_length; /* length of best match */ + IPos prev_match; /* previous match */ + int match_available; /* set if previous match exists */ + uInt strstart; /* start of string to insert */ + uInt match_start; /* start of matching string */ + uInt lookahead; /* number of valid bytes ahead in window */ + + uInt prev_length; + /* Length of the best match at previous step. Matches not greater than this + * are discarded. This is used in the lazy match evaluation. + */ + + uInt max_chain_length; + /* To speed up deflation, hash chains are never searched beyond this + * length. A higher limit improves compression ratio but degrades the + * speed. + */ + + uInt max_lazy_match; + /* Attempt to find a better match only when the current match is strictly + * smaller than this value. This mechanism is used only for compression + * levels >= 4. + */ +# define max_insert_length max_lazy_match + /* Insert new strings in the hash table only if the match length is not + * greater than this length. This saves time but degrades compression. + * max_insert_length is used only for compression levels <= 3. + */ + + int level; /* compression level (1..9) */ + int strategy; /* favor or force Huffman coding*/ + + uInt good_match; + /* Use a faster search when the previous match is longer than this */ + + int nice_match; /* Stop searching when current match exceeds this */ + + /* used by trees.c: */ + /* Didn't use ct_data typedef below to supress compiler warning */ + struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ + struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ + struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ + + struct tree_desc_s l_desc; /* desc. for literal tree */ + struct tree_desc_s d_desc; /* desc. for distance tree */ + struct tree_desc_s bl_desc; /* desc. for bit length tree */ + + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ + int heap_len; /* number of elements in the heap */ + int heap_max; /* element of largest frequency */ + /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. + * The same heap array is used to build all trees. + */ + + uch depth[2*L_CODES+1]; + /* Depth of each subtree used as tie breaker for trees of equal frequency + */ + + uchf *l_buf; /* buffer for literals or lengths */ + + uInt lit_bufsize; + /* Size of match buffer for literals/lengths. There are 4 reasons for + * limiting lit_bufsize to 64K: + * - frequencies can be kept in 16 bit counters + * - if compression is not successful for the first block, all input + * data is still in the window so we can still emit a stored block even + * when input comes from standard input. (This can also be done for + * all blocks if lit_bufsize is not greater than 32K.) + * - if compression is not successful for a file smaller than 64K, we can + * even emit a stored file instead of a stored block (saving 5 bytes). + * This is applicable only for zip (not gzip or zlib). + * - creating new Huffman trees less frequently may not provide fast + * adaptation to changes in the input data statistics. (Take for + * example a binary file with poorly compressible code followed by + * a highly compressible string table.) Smaller buffer sizes give + * fast adaptation but have of course the overhead of transmitting + * trees more frequently. + * - I can't count above 4 + */ + + uInt last_lit; /* running index in l_buf */ + + ushf *d_buf; + /* Buffer for distances. To simplify the code, d_buf and l_buf have + * the same number of elements. To use different lengths, an extra flag + * array would be necessary. + */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ + uInt matches; /* number of string matches in current block */ + int last_eob_len; /* bit length of EOB code for last block */ + +#ifdef DEBUG + ulg compressed_len; /* total bit length of compressed file mod 2^32 */ + ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ +#endif + + ush bi_buf; + /* Output buffer. bits are inserted starting at the bottom (least + * significant bits). + */ + int bi_valid; + /* Number of valid bits in bi_buf. All bits above the last valid bit + * are always zero. + */ + +} FAR deflate_state; + +/* Output a byte on the stream. + * IN assertion: there is enough room in pending_buf. + */ +#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} + + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) +/* In order to simplify the code, particularly on 16 bit machines, match + * distances are limited to MAX_DIST instead of WSIZE. + */ + + /* in trees.c */ +void _tr_init OF((deflate_state *s)); +int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); +void _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len, + int eof)); +void _tr_align OF((deflate_state *s)); +void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, + int eof)); + +#define d_code(dist) \ + ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) +/* Mapping from a distance to a distance code. dist is the distance - 1 and + * must not have side effects. _dist_code[256] and _dist_code[257] are never + * used. + */ + +#ifndef DEBUG +/* Inline versions of _tr_tally for speed: */ + +#if defined(GEN_TREES_H) || !defined(STDC) + extern uch _length_code[]; + extern uch _dist_code[]; +#else + extern const uch _length_code[]; + extern const uch _dist_code[]; +#endif + +# define _tr_tally_lit(s, c, flush) \ + { uch cc = (c); \ + s->d_buf[s->last_lit] = 0; \ + s->l_buf[s->last_lit++] = cc; \ + s->dyn_ltree[cc].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +# define _tr_tally_dist(s, distance, length, flush) \ + { uch len = (length); \ + ush dist = (distance); \ + s->d_buf[s->last_lit] = dist; \ + s->l_buf[s->last_lit++] = len; \ + dist--; \ + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ + s->dyn_dtree[d_code(dist)].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +#else +# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) +# define _tr_tally_dist(s, distance, length, flush) \ + flush = _tr_tally(s, distance, length) +#endif + +#endif /* DEFLATE_H */ Added: external/zlib/example.c ============================================================================== --- (empty file) +++ external/zlib/example.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,565 @@ +/* example.c -- usage example of the zlib compression library + * Copyright (C) 1995-2004 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include +#include "zlib.h" + +#ifdef STDC +# include +# include +#endif + +#if defined(VMS) || defined(RISCOS) +# define TESTFILE "foo-gz" +#else +# define TESTFILE "foo.gz" +#endif + +#define CHECK_ERR(err, msg) { \ + if (err != Z_OK) { \ + fprintf(stderr, "%s error: %d\n", msg, err); \ + exit(1); \ + } \ +} + +const char hello[] = "hello, hello!"; +/* "hello world" would be more standard, but the repeated "hello" + * stresses the compression code better, sorry... + */ + +const char dictionary[] = "hello"; +uLong dictId; /* Adler32 value of the dictionary */ + +void test_compress OF((Byte *compr, uLong comprLen, + Byte *uncompr, uLong uncomprLen)); +void test_gzio OF((const char *fname, + Byte *uncompr, uLong uncomprLen)); +void test_deflate OF((Byte *compr, uLong comprLen)); +void test_inflate OF((Byte *compr, uLong comprLen, + Byte *uncompr, uLong uncomprLen)); +void test_large_deflate OF((Byte *compr, uLong comprLen, + Byte *uncompr, uLong uncomprLen)); +void test_large_inflate OF((Byte *compr, uLong comprLen, + Byte *uncompr, uLong uncomprLen)); +void test_flush OF((Byte *compr, uLong *comprLen)); +void test_sync OF((Byte *compr, uLong comprLen, + Byte *uncompr, uLong uncomprLen)); +void test_dict_deflate OF((Byte *compr, uLong comprLen)); +void test_dict_inflate OF((Byte *compr, uLong comprLen, + Byte *uncompr, uLong uncomprLen)); +int main OF((int argc, char *argv[])); + +/* =========================================================================== + * Test compress() and uncompress() + */ +void test_compress(compr, comprLen, uncompr, uncomprLen) + Byte *compr, *uncompr; + uLong comprLen, uncomprLen; +{ + int err; + uLong len = (uLong)strlen(hello)+1; + + err = compress(compr, &comprLen, (const Bytef*)hello, len); + CHECK_ERR(err, "compress"); + + strcpy((char*)uncompr, "garbage"); + + err = uncompress(uncompr, &uncomprLen, compr, comprLen); + CHECK_ERR(err, "uncompress"); + + if (strcmp((char*)uncompr, hello)) { + fprintf(stderr, "bad uncompress\n"); + exit(1); + } else { + printf("uncompress(): %s\n", (char *)uncompr); + } +} + +/* =========================================================================== + * Test read/write of .gz files + */ +void test_gzio(fname, uncompr, uncomprLen) + const char *fname; /* compressed file name */ + Byte *uncompr; + uLong uncomprLen; +{ +#ifdef NO_GZCOMPRESS + fprintf(stderr, "NO_GZCOMPRESS -- gz* functions cannot compress\n"); +#else + int err; + int len = (int)strlen(hello)+1; + gzFile file; + z_off_t pos; + + file = gzopen(fname, "wb"); + if (file == NULL) { + fprintf(stderr, "gzopen error\n"); + exit(1); + } + gzputc(file, 'h'); + if (gzputs(file, "ello") != 4) { + fprintf(stderr, "gzputs err: %s\n", gzerror(file, &err)); + exit(1); + } + if (gzprintf(file, ", %s!", "hello") != 8) { + fprintf(stderr, "gzprintf err: %s\n", gzerror(file, &err)); + exit(1); + } + gzseek(file, 1L, SEEK_CUR); /* add one zero byte */ + gzclose(file); + + file = gzopen(fname, "rb"); + if (file == NULL) { + fprintf(stderr, "gzopen error\n"); + exit(1); + } + strcpy((char*)uncompr, "garbage"); + + if (gzread(file, uncompr, (unsigned)uncomprLen) != len) { + fprintf(stderr, "gzread err: %s\n", gzerror(file, &err)); + exit(1); + } + if (strcmp((char*)uncompr, hello)) { + fprintf(stderr, "bad gzread: %s\n", (char*)uncompr); + exit(1); + } else { + printf("gzread(): %s\n", (char*)uncompr); + } + + pos = gzseek(file, -8L, SEEK_CUR); + if (pos != 6 || gztell(file) != pos) { + fprintf(stderr, "gzseek error, pos=%ld, gztell=%ld\n", + (long)pos, (long)gztell(file)); + exit(1); + } + + if (gzgetc(file) != ' ') { + fprintf(stderr, "gzgetc error\n"); + exit(1); + } + + if (gzungetc(' ', file) != ' ') { + fprintf(stderr, "gzungetc error\n"); + exit(1); + } + + gzgets(file, (char*)uncompr, (int)uncomprLen); + if (strlen((char*)uncompr) != 7) { /* " hello!" */ + fprintf(stderr, "gzgets err after gzseek: %s\n", gzerror(file, &err)); + exit(1); + } + if (strcmp((char*)uncompr, hello + 6)) { + fprintf(stderr, "bad gzgets after gzseek\n"); + exit(1); + } else { + printf("gzgets() after gzseek: %s\n", (char*)uncompr); + } + + gzclose(file); +#endif +} + +/* =========================================================================== + * Test deflate() with small buffers + */ +void test_deflate(compr, comprLen) + Byte *compr; + uLong comprLen; +{ + z_stream c_stream; /* compression stream */ + int err; + uLong len = (uLong)strlen(hello)+1; + + c_stream.zalloc = (alloc_func)0; + c_stream.zfree = (free_func)0; + c_stream.opaque = (voidpf)0; + + err = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION); + CHECK_ERR(err, "deflateInit"); + + c_stream.next_in = (Bytef*)hello; + c_stream.next_out = compr; + + while (c_stream.total_in != len && c_stream.total_out < comprLen) { + c_stream.avail_in = c_stream.avail_out = 1; /* force small buffers */ + err = deflate(&c_stream, Z_NO_FLUSH); + CHECK_ERR(err, "deflate"); + } + /* Finish the stream, still forcing small buffers: */ + for (;;) { + c_stream.avail_out = 1; + err = deflate(&c_stream, Z_FINISH); + if (err == Z_STREAM_END) break; + CHECK_ERR(err, "deflate"); + } + + err = deflateEnd(&c_stream); + CHECK_ERR(err, "deflateEnd"); +} + +/* =========================================================================== + * Test inflate() with small buffers + */ +void test_inflate(compr, comprLen, uncompr, uncomprLen) + Byte *compr, *uncompr; + uLong comprLen, uncomprLen; +{ + int err; + z_stream d_stream; /* decompression stream */ + + strcpy((char*)uncompr, "garbage"); + + d_stream.zalloc = (alloc_func)0; + d_stream.zfree = (free_func)0; + d_stream.opaque = (voidpf)0; + + d_stream.next_in = compr; + d_stream.avail_in = 0; + d_stream.next_out = uncompr; + + err = inflateInit(&d_stream); + CHECK_ERR(err, "inflateInit"); + + while (d_stream.total_out < uncomprLen && d_stream.total_in < comprLen) { + d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */ + err = inflate(&d_stream, Z_NO_FLUSH); + if (err == Z_STREAM_END) break; + CHECK_ERR(err, "inflate"); + } + + err = inflateEnd(&d_stream); + CHECK_ERR(err, "inflateEnd"); + + if (strcmp((char*)uncompr, hello)) { + fprintf(stderr, "bad inflate\n"); + exit(1); + } else { + printf("inflate(): %s\n", (char *)uncompr); + } +} + +/* =========================================================================== + * Test deflate() with large buffers and dynamic change of compression level + */ +void test_large_deflate(compr, comprLen, uncompr, uncomprLen) + Byte *compr, *uncompr; + uLong comprLen, uncomprLen; +{ + z_stream c_stream; /* compression stream */ + int err; + + c_stream.zalloc = (alloc_func)0; + c_stream.zfree = (free_func)0; + c_stream.opaque = (voidpf)0; + + err = deflateInit(&c_stream, Z_BEST_SPEED); + CHECK_ERR(err, "deflateInit"); + + c_stream.next_out = compr; + c_stream.avail_out = (uInt)comprLen; + + /* At this point, uncompr is still mostly zeroes, so it should compress + * very well: + */ + c_stream.next_in = uncompr; + c_stream.avail_in = (uInt)uncomprLen; + err = deflate(&c_stream, Z_NO_FLUSH); + CHECK_ERR(err, "deflate"); + if (c_stream.avail_in != 0) { + fprintf(stderr, "deflate not greedy\n"); + exit(1); + } + + /* Feed in already compressed data and switch to no compression: */ + deflateParams(&c_stream, Z_NO_COMPRESSION, Z_DEFAULT_STRATEGY); + c_stream.next_in = compr; + c_stream.avail_in = (uInt)comprLen/2; + err = deflate(&c_stream, Z_NO_FLUSH); + CHECK_ERR(err, "deflate"); + + /* Switch back to compressing mode: */ + deflateParams(&c_stream, Z_BEST_COMPRESSION, Z_FILTERED); + c_stream.next_in = uncompr; + c_stream.avail_in = (uInt)uncomprLen; + err = deflate(&c_stream, Z_NO_FLUSH); + CHECK_ERR(err, "deflate"); + + err = deflate(&c_stream, Z_FINISH); + if (err != Z_STREAM_END) { + fprintf(stderr, "deflate should report Z_STREAM_END\n"); + exit(1); + } + err = deflateEnd(&c_stream); + CHECK_ERR(err, "deflateEnd"); +} + +/* =========================================================================== + * Test inflate() with large buffers + */ +void test_large_inflate(compr, comprLen, uncompr, uncomprLen) + Byte *compr, *uncompr; + uLong comprLen, uncomprLen; +{ + int err; + z_stream d_stream; /* decompression stream */ + + strcpy((char*)uncompr, "garbage"); + + d_stream.zalloc = (alloc_func)0; + d_stream.zfree = (free_func)0; + d_stream.opaque = (voidpf)0; + + d_stream.next_in = compr; + d_stream.avail_in = (uInt)comprLen; + + err = inflateInit(&d_stream); + CHECK_ERR(err, "inflateInit"); + + for (;;) { + d_stream.next_out = uncompr; /* discard the output */ + d_stream.avail_out = (uInt)uncomprLen; + err = inflate(&d_stream, Z_NO_FLUSH); + if (err == Z_STREAM_END) break; + CHECK_ERR(err, "large inflate"); + } + + err = inflateEnd(&d_stream); + CHECK_ERR(err, "inflateEnd"); + + if (d_stream.total_out != 2*uncomprLen + comprLen/2) { + fprintf(stderr, "bad large inflate: %ld\n", d_stream.total_out); + exit(1); + } else { + printf("large_inflate(): OK\n"); + } +} + +/* =========================================================================== + * Test deflate() with full flush + */ +void test_flush(compr, comprLen) + Byte *compr; + uLong *comprLen; +{ + z_stream c_stream; /* compression stream */ + int err; + uInt len = (uInt)strlen(hello)+1; + + c_stream.zalloc = (alloc_func)0; + c_stream.zfree = (free_func)0; + c_stream.opaque = (voidpf)0; + + err = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION); + CHECK_ERR(err, "deflateInit"); + + c_stream.next_in = (Bytef*)hello; + c_stream.next_out = compr; + c_stream.avail_in = 3; + c_stream.avail_out = (uInt)*comprLen; + err = deflate(&c_stream, Z_FULL_FLUSH); + CHECK_ERR(err, "deflate"); + + compr[3]++; /* force an error in first compressed block */ + c_stream.avail_in = len - 3; + + err = deflate(&c_stream, Z_FINISH); + if (err != Z_STREAM_END) { + CHECK_ERR(err, "deflate"); + } + err = deflateEnd(&c_stream); + CHECK_ERR(err, "deflateEnd"); + + *comprLen = c_stream.total_out; +} + +/* =========================================================================== + * Test inflateSync() + */ +void test_sync(compr, comprLen, uncompr, uncomprLen) + Byte *compr, *uncompr; + uLong comprLen, uncomprLen; +{ + int err; + z_stream d_stream; /* decompression stream */ + + strcpy((char*)uncompr, "garbage"); + + d_stream.zalloc = (alloc_func)0; + d_stream.zfree = (free_func)0; + d_stream.opaque = (voidpf)0; + + d_stream.next_in = compr; + d_stream.avail_in = 2; /* just read the zlib header */ + + err = inflateInit(&d_stream); + CHECK_ERR(err, "inflateInit"); + + d_stream.next_out = uncompr; + d_stream.avail_out = (uInt)uncomprLen; + + inflate(&d_stream, Z_NO_FLUSH); + CHECK_ERR(err, "inflate"); + + d_stream.avail_in = (uInt)comprLen-2; /* read all compressed data */ + err = inflateSync(&d_stream); /* but skip the damaged part */ + CHECK_ERR(err, "inflateSync"); + + err = inflate(&d_stream, Z_FINISH); + if (err != Z_DATA_ERROR) { + fprintf(stderr, "inflate should report DATA_ERROR\n"); + /* Because of incorrect adler32 */ + exit(1); + } + err = inflateEnd(&d_stream); + CHECK_ERR(err, "inflateEnd"); + + printf("after inflateSync(): hel%s\n", (char *)uncompr); +} + +/* =========================================================================== + * Test deflate() with preset dictionary + */ +void test_dict_deflate(compr, comprLen) + Byte *compr; + uLong comprLen; +{ + z_stream c_stream; /* compression stream */ + int err; + + c_stream.zalloc = (alloc_func)0; + c_stream.zfree = (free_func)0; + c_stream.opaque = (voidpf)0; + + err = deflateInit(&c_stream, Z_BEST_COMPRESSION); + CHECK_ERR(err, "deflateInit"); + + err = deflateSetDictionary(&c_stream, + (const Bytef*)dictionary, sizeof(dictionary)); + CHECK_ERR(err, "deflateSetDictionary"); + + dictId = c_stream.adler; + c_stream.next_out = compr; + c_stream.avail_out = (uInt)comprLen; + + c_stream.next_in = (Bytef*)hello; + c_stream.avail_in = (uInt)strlen(hello)+1; + + err = deflate(&c_stream, Z_FINISH); + if (err != Z_STREAM_END) { + fprintf(stderr, "deflate should report Z_STREAM_END\n"); + exit(1); + } + err = deflateEnd(&c_stream); + CHECK_ERR(err, "deflateEnd"); +} + +/* =========================================================================== + * Test inflate() with a preset dictionary + */ +void test_dict_inflate(compr, comprLen, uncompr, uncomprLen) + Byte *compr, *uncompr; + uLong comprLen, uncomprLen; +{ + int err; + z_stream d_stream; /* decompression stream */ + + strcpy((char*)uncompr, "garbage"); + + d_stream.zalloc = (alloc_func)0; + d_stream.zfree = (free_func)0; + d_stream.opaque = (voidpf)0; + + d_stream.next_in = compr; + d_stream.avail_in = (uInt)comprLen; + + err = inflateInit(&d_stream); + CHECK_ERR(err, "inflateInit"); + + d_stream.next_out = uncompr; + d_stream.avail_out = (uInt)uncomprLen; + + for (;;) { + err = inflate(&d_stream, Z_NO_FLUSH); + if (err == Z_STREAM_END) break; + if (err == Z_NEED_DICT) { + if (d_stream.adler != dictId) { + fprintf(stderr, "unexpected dictionary"); + exit(1); + } + err = inflateSetDictionary(&d_stream, (const Bytef*)dictionary, + sizeof(dictionary)); + } + CHECK_ERR(err, "inflate with dict"); + } + + err = inflateEnd(&d_stream); + CHECK_ERR(err, "inflateEnd"); + + if (strcmp((char*)uncompr, hello)) { + fprintf(stderr, "bad inflate with dict\n"); + exit(1); + } else { + printf("inflate with dictionary: %s\n", (char *)uncompr); + } +} + +/* =========================================================================== + * Usage: example [output.gz [input.gz]] + */ + +int main(argc, argv) + int argc; + char *argv[]; +{ + Byte *compr, *uncompr; + uLong comprLen = 10000*sizeof(int); /* don't overflow on MSDOS */ + uLong uncomprLen = comprLen; + static const char* myVersion = ZLIB_VERSION; + + if (zlibVersion()[0] != myVersion[0]) { + fprintf(stderr, "incompatible zlib version\n"); + exit(1); + + } else if (strcmp(zlibVersion(), ZLIB_VERSION) != 0) { + fprintf(stderr, "warning: different zlib version\n"); + } + + printf("zlib version %s = 0x%04x, compile flags = 0x%lx\n", + ZLIB_VERSION, ZLIB_VERNUM, zlibCompileFlags()); + + compr = (Byte*)calloc((uInt)comprLen, 1); + uncompr = (Byte*)calloc((uInt)uncomprLen, 1); + /* compr and uncompr are cleared to avoid reading uninitialized + * data and to ensure that uncompr compresses well. + */ + if (compr == Z_NULL || uncompr == Z_NULL) { + printf("out of memory\n"); + exit(1); + } + test_compress(compr, comprLen, uncompr, uncomprLen); + + test_gzio((argc > 1 ? argv[1] : TESTFILE), + uncompr, uncomprLen); + + test_deflate(compr, comprLen); + test_inflate(compr, comprLen, uncompr, uncomprLen); + + test_large_deflate(compr, comprLen, uncompr, uncomprLen); + test_large_inflate(compr, comprLen, uncompr, uncomprLen); + + test_flush(compr, &comprLen); + test_sync(compr, comprLen, uncompr, uncomprLen); + comprLen = uncomprLen; + + test_dict_deflate(compr, comprLen); + test_dict_inflate(compr, comprLen, uncompr, uncomprLen); + + free(compr); + free(uncompr); + + return 0; +} Added: external/zlib/examples/README.examples ============================================================================== --- (empty file) +++ external/zlib/examples/README.examples Tue Jan 3 07:42:59 2006 @@ -0,0 +1,42 @@ +This directory contains examples of the use of zlib. + +fitblk.c + compress just enough input to nearly fill a requested output size + - zlib isn't designed to do this, but fitblk does it anyway + +gun.c + uncompress a gzip file + - illustrates the use of inflateBack() for high speed file-to-file + decompression using call-back functions + - is approximately twice as fast as gzip -d + - also provides Unix uncompress functionality, again twice as fast + +gzappend.c + append to a gzip file + - illustrates the use of the Z_BLOCK flush parameter for inflate() + - illustrates the use of deflatePrime() to start at any bit + +gzjoin.c + join gzip files without recalculating the crc or recompressing + - illustrates the use of the Z_BLOCK flush parameter for inflate() + - illustrates the use of crc32_combine() + +gzlog.c +gzlog.h + efficiently maintain a message log file in gzip format + - illustrates use of raw deflate and Z_SYNC_FLUSH + - illustrates use of gzip header extra field + +zlib_how.html + painfully comprehensive description of zpipe.c (see below) + - describes in excruciating detail the use of deflate() and inflate() + +zpipe.c + reads and writes zlib streams from stdin to stdout + - illustrates the proper use of deflate() and inflate() + - deeply commented in zlib_how.html (see above) + +zran.c + index a zlib or gzip stream and randomly access it + - illustrates the use of Z_BLOCK, inflatePrime(), and + inflateSetDictionary() to provide random access Added: external/zlib/examples/fitblk.c ============================================================================== --- (empty file) +++ external/zlib/examples/fitblk.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,233 @@ +/* fitblk.c: example of fitting compressed output to a specified size + Not copyrighted -- provided to the public domain + Version 1.1 25 November 2004 Mark Adler */ + +/* Version history: + 1.0 24 Nov 2004 First version + 1.1 25 Nov 2004 Change deflateInit2() to deflateInit() + Use fixed-size, stack-allocated raw buffers + Simplify code moving compression to subroutines + Use assert() for internal errors + Add detailed description of approach + */ + +/* Approach to just fitting a requested compressed size: + + fitblk performs three compression passes on a portion of the input + data in order to determine how much of that input will compress to + nearly the requested output block size. The first pass generates + enough deflate blocks to produce output to fill the requested + output size plus a specfied excess amount (see the EXCESS define + below). The last deflate block may go quite a bit past that, but + is discarded. The second pass decompresses and recompresses just + the compressed data that fit in the requested plus excess sized + buffer. The deflate process is terminated after that amount of + input, which is less than the amount consumed on the first pass. + The last deflate block of the result will be of a comparable size + to the final product, so that the header for that deflate block and + the compression ratio for that block will be about the same as in + the final product. The third compression pass decompresses the + result of the second step, but only the compressed data up to the + requested size minus an amount to allow the compressed stream to + complete (see the MARGIN define below). That will result in a + final compressed stream whose length is less than or equal to the + requested size. Assuming sufficient input and a requested size + greater than a few hundred bytes, the shortfall will typically be + less than ten bytes. + + If the input is short enough that the first compression completes + before filling the requested output size, then that compressed + stream is return with no recompression. + + EXCESS is chosen to be just greater than the shortfall seen in a + two pass approach similar to the above. That shortfall is due to + the last deflate block compressing more efficiently with a smaller + header on the second pass. EXCESS is set to be large enough so + that there is enough uncompressed data for the second pass to fill + out the requested size, and small enough so that the final deflate + block of the second pass will be close in size to the final deflate + block of the third and final pass. MARGIN is chosen to be just + large enough to assure that the final compression has enough room + to complete in all cases. + */ + +#include +#include +#include +#include "zlib.h" + +#define local static + +/* print nastygram and leave */ +local void quit(char *why) +{ + fprintf(stderr, "fitblk abort: %s\n", why); + exit(1); +} + +#define RAWLEN 4096 /* intermediate uncompressed buffer size */ + +/* compress from file to def until provided buffer is full or end of + input reached; return last deflate() return value, or Z_ERRNO if + there was read error on the file */ +local int partcompress(FILE *in, z_streamp def) +{ + int ret, flush; + unsigned char raw[RAWLEN]; + + flush = Z_NO_FLUSH; + do { + def->avail_in = fread(raw, 1, RAWLEN, in); + if (ferror(in)) + return Z_ERRNO; + def->next_in = raw; + if (feof(in)) + flush = Z_FINISH; + ret = deflate(def, flush); + assert(ret != Z_STREAM_ERROR); + } while (def->avail_out != 0 && flush == Z_NO_FLUSH); + return ret; +} + +/* recompress from inf's input to def's output; the input for inf and + the output for def are set in those structures before calling; + return last deflate() return value, or Z_MEM_ERROR if inflate() + was not able to allocate enough memory when it needed to */ +local int recompress(z_streamp inf, z_streamp def) +{ + int ret, flush; + unsigned char raw[RAWLEN]; + + flush = Z_NO_FLUSH; + do { + /* decompress */ + inf->avail_out = RAWLEN; + inf->next_out = raw; + ret = inflate(inf, Z_NO_FLUSH); + assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && + ret != Z_NEED_DICT); + if (ret == Z_MEM_ERROR) + return ret; + + /* compress what was decompresed until done or no room */ + def->avail_in = RAWLEN - inf->avail_out; + def->next_in = raw; + if (inf->avail_out != 0) + flush = Z_FINISH; + ret = deflate(def, flush); + assert(ret != Z_STREAM_ERROR); + } while (ret != Z_STREAM_END && def->avail_out != 0); + return ret; +} + +#define EXCESS 256 /* empirically determined stream overage */ +#define MARGIN 8 /* amount to back off for completion */ + +/* compress from stdin to fixed-size block on stdout */ +int main(int argc, char **argv) +{ + int ret; /* return code */ + unsigned size; /* requested fixed output block size */ + unsigned have; /* bytes written by deflate() call */ + unsigned char *blk; /* intermediate and final stream */ + unsigned char *tmp; /* close to desired size stream */ + z_stream def, inf; /* zlib deflate and inflate states */ + + /* get requested output size */ + if (argc != 2) + quit("need one argument: size of output block"); + ret = strtol(argv[1], argv + 1, 10); + if (argv[1][0] != 0) + quit("argument must be a number"); + if (ret < 8) /* 8 is minimum zlib stream size */ + quit("need positive size of 8 or greater"); + size = (unsigned)ret; + + /* allocate memory for buffers and compression engine */ + blk = malloc(size + EXCESS); + def.zalloc = Z_NULL; + def.zfree = Z_NULL; + def.opaque = Z_NULL; + ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); + if (ret != Z_OK || blk == NULL) + quit("out of memory"); + + /* compress from stdin until output full, or no more input */ + def.avail_out = size + EXCESS; + def.next_out = blk; + ret = partcompress(stdin, &def); + if (ret == Z_ERRNO) + quit("error reading input"); + + /* if it all fit, then size was undersubscribed -- done! */ + if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { + /* write block to stdout */ + have = size + EXCESS - def.avail_out; + if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) + quit("error writing output"); + + /* clean up and print results to stderr */ + ret = deflateEnd(&def); + assert(ret != Z_STREAM_ERROR); + free(blk); + fprintf(stderr, + "%u bytes unused out of %u requested (all input)\n", + size - have, size); + return 0; + } + + /* it didn't all fit -- set up for recompression */ + inf.zalloc = Z_NULL; + inf.zfree = Z_NULL; + inf.opaque = Z_NULL; + inf.avail_in = 0; + inf.next_in = Z_NULL; + ret = inflateInit(&inf); + tmp = malloc(size + EXCESS); + if (ret != Z_OK || tmp == NULL) + quit("out of memory"); + ret = deflateReset(&def); + assert(ret != Z_STREAM_ERROR); + + /* do first recompression close to the right amount */ + inf.avail_in = size + EXCESS; + inf.next_in = blk; + def.avail_out = size + EXCESS; + def.next_out = tmp; + ret = recompress(&inf, &def); + if (ret == Z_MEM_ERROR) + quit("out of memory"); + + /* set up for next reocmpression */ + ret = inflateReset(&inf); + assert(ret != Z_STREAM_ERROR); + ret = deflateReset(&def); + assert(ret != Z_STREAM_ERROR); + + /* do second and final recompression (third compression) */ + inf.avail_in = size - MARGIN; /* assure stream will complete */ + inf.next_in = tmp; + def.avail_out = size; + def.next_out = blk; + ret = recompress(&inf, &def); + if (ret == Z_MEM_ERROR) + quit("out of memory"); + assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */ + + /* done -- write block to stdout */ + have = size - def.avail_out; + if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) + quit("error writing output"); + + /* clean up and print results to stderr */ + free(tmp); + ret = inflateEnd(&inf); + assert(ret != Z_STREAM_ERROR); + ret = deflateEnd(&def); + assert(ret != Z_STREAM_ERROR); + free(blk); + fprintf(stderr, + "%u bytes unused out of %u requested (%lu input)\n", + size - have, size, def.total_in); + return 0; +} Added: external/zlib/examples/gun.c ============================================================================== --- (empty file) +++ external/zlib/examples/gun.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,693 @@ +/* gun.c -- simple gunzip to give an example of the use of inflateBack() + * Copyright (C) 2003, 2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + Version 1.3 12 June 2005 Mark Adler */ + +/* Version history: + 1.0 16 Feb 2003 First version for testing of inflateBack() + 1.1 21 Feb 2005 Decompress concatenated gzip streams + Remove use of "this" variable (C++ keyword) + Fix return value for in() + Improve allocation failure checking + Add typecasting for void * structures + Add -h option for command version and usage + Add a bunch of comments + 1.2 20 Mar 2005 Add Unix compress (LZW) decompression + Copy file attributes from input file to output file + 1.3 12 Jun 2005 Add casts for error messages [Oberhumer] + */ + +/* + gun [ -t ] [ name ... ] + + decompresses the data in the named gzip files. If no arguments are given, + gun will decompress from stdin to stdout. The names must end in .gz, -gz, + .z, -z, _z, or .Z. The uncompressed data will be written to a file name + with the suffix stripped. On success, the original file is deleted. On + failure, the output file is deleted. For most failures, the command will + continue to process the remaining names on the command line. A memory + allocation failure will abort the command. If -t is specified, then the + listed files or stdin will be tested as gzip files for integrity (without + checking for a proper suffix), no output will be written, and no files + will be deleted. + + Like gzip, gun allows concatenated gzip streams and will decompress them, + writing all of the uncompressed data to the output. Unlike gzip, gun allows + an empty file on input, and will produce no error writing an empty output + file. + + gun will also decompress files made by Unix compress, which uses LZW + compression. These files are automatically detected by virtue of their + magic header bytes. Since the end of Unix compress stream is marked by the + end-of-file, they cannot be concantenated. If a Unix compress stream is + encountered in an input file, it is the last stream in that file. + + Like gunzip and uncompress, the file attributes of the orignal compressed + file are maintained in the final uncompressed file, to the extent that the + user permissions allow it. + + On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version + 1.2.4) is on the same file, when gun is linked with zlib 1.2.2. Also the + LZW decompression provided by gun is about twice as fast as the standard + Unix uncompress command. + */ + +/* external functions and related types and constants */ +#include /* fprintf() */ +#include /* malloc(), free() */ +#include /* strerror(), strcmp(), strlen(), memcpy() */ +#include /* errno */ +#include /* open() */ +#include /* read(), write(), close(), chown(), unlink() */ +#include +#include /* stat(), chmod() */ +#include /* utime() */ +#include "zlib.h" /* inflateBackInit(), inflateBack(), */ + /* inflateBackEnd(), crc32() */ + +/* function declaration */ +#define local static + +/* buffer constants */ +#define SIZE 32768U /* input and output buffer sizes */ +#define PIECE 16384 /* limits i/o chunks for 16-bit int case */ + +/* structure for infback() to pass to input function in() -- it maintains the + input file and a buffer of size SIZE */ +struct ind { + int infile; + unsigned char *inbuf; +}; + +/* Load input buffer, assumed to be empty, and return bytes loaded and a + pointer to them. read() is called until the buffer is full, or until it + returns end-of-file or error. Return 0 on error. */ +local unsigned in(void *in_desc, unsigned char **buf) +{ + int ret; + unsigned len; + unsigned char *next; + struct ind *me = (struct ind *)in_desc; + + next = me->inbuf; + *buf = next; + len = 0; + do { + ret = PIECE; + if ((unsigned)ret > SIZE - len) + ret = (int)(SIZE - len); + ret = (int)read(me->infile, next, ret); + if (ret == -1) { + len = 0; + break; + } + next += ret; + len += ret; + } while (ret != 0 && len < SIZE); + return len; +} + +/* structure for infback() to pass to output function out() -- it maintains the + output file, a running CRC-32 check on the output and the total number of + bytes output, both for checking against the gzip trailer. (The length in + the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and + the output is greater than 4 GB.) */ +struct outd { + int outfile; + int check; /* true if checking crc and total */ + unsigned long crc; + unsigned long total; +}; + +/* Write output buffer and update the CRC-32 and total bytes written. write() + is called until all of the output is written or an error is encountered. + On success out() returns 0. For a write failure, out() returns 1. If the + output file descriptor is -1, then nothing is written. + */ +local int out(void *out_desc, unsigned char *buf, unsigned len) +{ + int ret; + struct outd *me = (struct outd *)out_desc; + + if (me->check) { + me->crc = crc32(me->crc, buf, len); + me->total += len; + } + if (me->outfile != -1) + do { + ret = PIECE; + if ((unsigned)ret > len) + ret = (int)len; + ret = (int)write(me->outfile, buf, ret); + if (ret == -1) + return 1; + buf += ret; + len -= ret; + } while (len != 0); + return 0; +} + +/* next input byte macro for use inside lunpipe() and gunpipe() */ +#define NEXT() (have ? 0 : (have = in(indp, &next)), \ + last = have ? (have--, (int)(*next++)) : -1) + +/* memory for gunpipe() and lunpipe() -- + the first 256 entries of prefix[] and suffix[] are never used, could + have offset the index, but it's faster to waste the memory */ +unsigned char inbuf[SIZE]; /* input buffer */ +unsigned char outbuf[SIZE]; /* output buffer */ +unsigned short prefix[65536]; /* index to LZW prefix string */ +unsigned char suffix[65536]; /* one-character LZW suffix */ +unsigned char match[65280 + 2]; /* buffer for reversed match or gzip + 32K sliding window */ + +/* throw out what's left in the current bits byte buffer (this is a vestigial + aspect of the compressed data format derived from an implementation that + made use of a special VAX machine instruction!) */ +#define FLUSHCODE() \ + do { \ + left = 0; \ + rem = 0; \ + if (chunk > have) { \ + chunk -= have; \ + have = 0; \ + if (NEXT() == -1) \ + break; \ + chunk--; \ + if (chunk > have) { \ + chunk = have = 0; \ + break; \ + } \ + } \ + have -= chunk; \ + next += chunk; \ + chunk = 0; \ + } while (0) + +/* Decompress a compress (LZW) file from indp to outfile. The compress magic + header (two bytes) has already been read and verified. There are have bytes + of buffered input at next. strm is used for passing error information back + to gunpipe(). + + lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of + file, read error, or write error (a write error indicated by strm->next_in + not equal to Z_NULL), or Z_DATA_ERROR for invalid input. + */ +local int lunpipe(unsigned have, unsigned char *next, struct ind *indp, + int outfile, z_stream *strm) +{ + int last; /* last byte read by NEXT(), or -1 if EOF */ + int chunk; /* bytes left in current chunk */ + int left; /* bits left in rem */ + unsigned rem; /* unused bits from input */ + int bits; /* current bits per code */ + unsigned code; /* code, table traversal index */ + unsigned mask; /* mask for current bits codes */ + int max; /* maximum bits per code for this stream */ + int flags; /* compress flags, then block compress flag */ + unsigned end; /* last valid entry in prefix/suffix tables */ + unsigned temp; /* current code */ + unsigned prev; /* previous code */ + unsigned final; /* last character written for previous code */ + unsigned stack; /* next position for reversed string */ + unsigned outcnt; /* bytes in output buffer */ + struct outd outd; /* output structure */ + + /* set up output */ + outd.outfile = outfile; + outd.check = 0; + + /* process remainder of compress header -- a flags byte */ + flags = NEXT(); + if (last == -1) + return Z_BUF_ERROR; + if (flags & 0x60) { + strm->msg = (char *)"unknown lzw flags set"; + return Z_DATA_ERROR; + } + max = flags & 0x1f; + if (max < 9 || max > 16) { + strm->msg = (char *)"lzw bits out of range"; + return Z_DATA_ERROR; + } + if (max == 9) /* 9 doesn't really mean 9 */ + max = 10; + flags &= 0x80; /* true if block compress */ + + /* clear table */ + bits = 9; + mask = 0x1ff; + end = flags ? 256 : 255; + + /* set up: get first 9-bit code, which is the first decompressed byte, but + don't create a table entry until the next code */ + if (NEXT() == -1) /* no compressed data is ok */ + return Z_OK; + final = prev = (unsigned)last; /* low 8 bits of code */ + if (NEXT() == -1) /* missing a bit */ + return Z_BUF_ERROR; + if (last & 1) { /* code must be < 256 */ + strm->msg = (char *)"invalid lzw code"; + return Z_DATA_ERROR; + } + rem = (unsigned)last >> 1; /* remaining 7 bits */ + left = 7; + chunk = bits - 2; /* 7 bytes left in this chunk */ + outbuf[0] = (unsigned char)final; /* write first decompressed byte */ + outcnt = 1; + + /* decode codes */ + stack = 0; + for (;;) { + /* if the table will be full after this, increment the code size */ + if (end >= mask && bits < max) { + FLUSHCODE(); + bits++; + mask <<= 1; + mask++; + } + + /* get a code of length bits */ + if (chunk == 0) /* decrement chunk modulo bits */ + chunk = bits; + code = rem; /* low bits of code */ + if (NEXT() == -1) { /* EOF is end of compressed data */ + /* write remaining buffered output */ + if (outcnt && out(&outd, outbuf, outcnt)) { + strm->next_in = outbuf; /* signal write error */ + return Z_BUF_ERROR; + } + return Z_OK; + } + code += (unsigned)last << left; /* middle (or high) bits of code */ + left += 8; + chunk--; + if (bits > left) { /* need more bits */ + if (NEXT() == -1) /* can't end in middle of code */ + return Z_BUF_ERROR; + code += (unsigned)last << left; /* high bits of code */ + left += 8; + chunk--; + } + code &= mask; /* mask to current code length */ + left -= bits; /* number of unused bits */ + rem = (unsigned)last >> (8 - left); /* unused bits from last byte */ + + /* process clear code (256) */ + if (code == 256 && flags) { + FLUSHCODE(); + bits = 9; /* initialize bits and mask */ + mask = 0x1ff; + end = 255; /* empty table */ + continue; /* get next code */ + } + + /* special code to reuse last match */ + temp = code; /* save the current code */ + if (code > end) { + /* Be picky on the allowed code here, and make sure that the code + we drop through (prev) will be a valid index so that random + input does not cause an exception. The code != end + 1 check is + empirically derived, and not checked in the original uncompress + code. If this ever causes a problem, that check could be safely + removed. Leaving this check in greatly improves gun's ability + to detect random or corrupted input after a compress header. + In any case, the prev > end check must be retained. */ + if (code != end + 1 || prev > end) { + strm->msg = (char *)"invalid lzw code"; + return Z_DATA_ERROR; + } + match[stack++] = (unsigned char)final; + code = prev; + } + + /* walk through linked list to generate output in reverse order */ + while (code >= 256) { + match[stack++] = suffix[code]; + code = prefix[code]; + } + match[stack++] = (unsigned char)code; + final = code; + + /* link new table entry */ + if (end < mask) { + end++; + prefix[end] = (unsigned short)prev; + suffix[end] = (unsigned char)final; + } + + /* set previous code for next iteration */ + prev = temp; + + /* write output in forward order */ + while (stack > SIZE - outcnt) { + while (outcnt < SIZE) + outbuf[outcnt++] = match[--stack]; + if (out(&outd, outbuf, outcnt)) { + strm->next_in = outbuf; /* signal write error */ + return Z_BUF_ERROR; + } + outcnt = 0; + } + do { + outbuf[outcnt++] = match[--stack]; + } while (stack); + + /* loop for next code with final and prev as the last match, rem and + left provide the first 0..7 bits of the next code, end is the last + valid table entry */ + } +} + +/* Decompress a gzip file from infile to outfile. strm is assumed to have been + successfully initialized with inflateBackInit(). The input file may consist + of a series of gzip streams, in which case all of them will be decompressed + to the output file. If outfile is -1, then the gzip stream(s) integrity is + checked and nothing is written. + + The return value is a zlib error code: Z_MEM_ERROR if out of memory, + Z_DATA_ERROR if the header or the compressed data is invalid, or if the + trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends + prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip + stream) follows a valid gzip stream. + */ +local int gunpipe(z_stream *strm, int infile, int outfile) +{ + int ret, first, last; + unsigned have, flags, len; + unsigned char *next; + struct ind ind, *indp; + struct outd outd; + + /* setup input buffer */ + ind.infile = infile; + ind.inbuf = inbuf; + indp = &ind; + + /* decompress concatenated gzip streams */ + have = 0; /* no input data read in yet */ + first = 1; /* looking for first gzip header */ + strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */ + for (;;) { + /* look for the two magic header bytes for a gzip stream */ + if (NEXT() == -1) { + ret = Z_OK; + break; /* empty gzip stream is ok */ + } + if (last != 31 || (NEXT() != 139 && last != 157)) { + strm->msg = (char *)"incorrect header check"; + ret = first ? Z_DATA_ERROR : Z_ERRNO; + break; /* not a gzip or compress header */ + } + first = 0; /* next non-header is junk */ + + /* process a compress (LZW) file -- can't be concatenated after this */ + if (last == 157) { + ret = lunpipe(have, next, indp, outfile, strm); + break; + } + + /* process remainder of gzip header */ + ret = Z_BUF_ERROR; + if (NEXT() != 8) { /* only deflate method allowed */ + if (last == -1) break; + strm->msg = (char *)"unknown compression method"; + ret = Z_DATA_ERROR; + break; + } + flags = NEXT(); /* header flags */ + NEXT(); /* discard mod time, xflgs, os */ + NEXT(); + NEXT(); + NEXT(); + NEXT(); + NEXT(); + if (last == -1) break; + if (flags & 0xe0) { + strm->msg = (char *)"unknown header flags set"; + ret = Z_DATA_ERROR; + break; + } + if (flags & 4) { /* extra field */ + len = NEXT(); + len += (unsigned)(NEXT()) << 8; + if (last == -1) break; + while (len > have) { + len -= have; + have = 0; + if (NEXT() == -1) break; + len--; + } + if (last == -1) break; + have -= len; + next += len; + } + if (flags & 8) /* file name */ + while (NEXT() != 0 && last != -1) + ; + if (flags & 16) /* comment */ + while (NEXT() != 0 && last != -1) + ; + if (flags & 2) { /* header crc */ + NEXT(); + NEXT(); + } + if (last == -1) break; + + /* set up output */ + outd.outfile = outfile; + outd.check = 1; + outd.crc = crc32(0L, Z_NULL, 0); + outd.total = 0; + + /* decompress data to output */ + strm->next_in = next; + strm->avail_in = have; + ret = inflateBack(strm, in, indp, out, &outd); + if (ret != Z_STREAM_END) break; + next = strm->next_in; + have = strm->avail_in; + strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */ + + /* check trailer */ + ret = Z_BUF_ERROR; + if (NEXT() != (outd.crc & 0xff) || + NEXT() != ((outd.crc >> 8) & 0xff) || + NEXT() != ((outd.crc >> 16) & 0xff) || + NEXT() != ((outd.crc >> 24) & 0xff)) { + /* crc error */ + if (last != -1) { + strm->msg = (char *)"incorrect data check"; + ret = Z_DATA_ERROR; + } + break; + } + if (NEXT() != (outd.total & 0xff) || + NEXT() != ((outd.total >> 8) & 0xff) || + NEXT() != ((outd.total >> 16) & 0xff) || + NEXT() != ((outd.total >> 24) & 0xff)) { + /* length error */ + if (last != -1) { + strm->msg = (char *)"incorrect length check"; + ret = Z_DATA_ERROR; + } + break; + } + + /* go back and look for another gzip stream */ + } + + /* clean up and return */ + return ret; +} + +/* Copy file attributes, from -> to, as best we can. This is best effort, so + no errors are reported. The mode bits, including suid, sgid, and the sticky + bit are copied (if allowed), the owner's user id and group id are copied + (again if allowed), and the access and modify times are copied. */ +local void copymeta(char *from, char *to) +{ + struct stat was; + struct utimbuf when; + + /* get all of from's Unix meta data, return if not a regular file */ + if (stat(from, &was) != 0 || (was.st_mode & S_IFMT) != S_IFREG) + return; + + /* set to's mode bits, ignore errors */ + (void)chmod(to, was.st_mode & 07777); + + /* copy owner's user and group, ignore errors */ + (void)chown(to, was.st_uid, was.st_gid); + + /* copy access and modify times, ignore errors */ + when.actime = was.st_atime; + when.modtime = was.st_mtime; + (void)utime(to, &when); +} + +/* Decompress the file inname to the file outnname, of if test is true, just + decompress without writing and check the gzip trailer for integrity. If + inname is NULL or an empty string, read from stdin. If outname is NULL or + an empty string, write to stdout. strm is a pre-initialized inflateBack + structure. When appropriate, copy the file attributes from inname to + outname. + + gunzip() returns 1 if there is an out-of-memory error or an unexpected + return code from gunpipe(). Otherwise it returns 0. + */ +local int gunzip(z_stream *strm, char *inname, char *outname, int test) +{ + int ret; + int infile, outfile; + + /* open files */ + if (inname == NULL || *inname == 0) { + inname = "-"; + infile = 0; /* stdin */ + } + else { + infile = open(inname, O_RDONLY, 0); + if (infile == -1) { + fprintf(stderr, "gun cannot open %s\n", inname); + return 0; + } + } + if (test) + outfile = -1; + else if (outname == NULL || *outname == 0) { + outname = "-"; + outfile = 1; /* stdout */ + } + else { + outfile = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0666); + if (outfile == -1) { + close(infile); + fprintf(stderr, "gun cannot create %s\n", outname); + return 0; + } + } + errno = 0; + + /* decompress */ + ret = gunpipe(strm, infile, outfile); + if (outfile > 2) close(outfile); + if (infile > 2) close(infile); + + /* interpret result */ + switch (ret) { + case Z_OK: + case Z_ERRNO: + if (infile > 2 && outfile > 2) { + copymeta(inname, outname); /* copy attributes */ + unlink(inname); + } + if (ret == Z_ERRNO) + fprintf(stderr, "gun warning: trailing garbage ignored in %s\n", + inname); + break; + case Z_DATA_ERROR: + if (outfile > 2) unlink(outname); + fprintf(stderr, "gun data error on %s: %s\n", inname, strm->msg); + break; + case Z_MEM_ERROR: + if (outfile > 2) unlink(outname); + fprintf(stderr, "gun out of memory error--aborting\n"); + return 1; + case Z_BUF_ERROR: + if (outfile > 2) unlink(outname); + if (strm->next_in != Z_NULL) { + fprintf(stderr, "gun write error on %s: %s\n", + outname, strerror(errno)); + } + else if (errno) { + fprintf(stderr, "gun read error on %s: %s\n", + inname, strerror(errno)); + } + else { + fprintf(stderr, "gun unexpected end of file on %s\n", + inname); + } + break; + default: + if (outfile > 2) unlink(outname); + fprintf(stderr, "gun internal error--aborting\n"); + return 1; + } + return 0; +} + +/* Process the gun command line arguments. See the command syntax near the + beginning of this source file. */ +int main(int argc, char **argv) +{ + int ret, len, test; + char *outname; + unsigned char *window; + z_stream strm; + + /* initialize inflateBack state for repeated use */ + window = match; /* reuse LZW match buffer */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + ret = inflateBackInit(&strm, 15, window); + if (ret != Z_OK) { + fprintf(stderr, "gun out of memory error--aborting\n"); + return 1; + } + + /* decompress each file to the same name with the suffix removed */ + argc--; + argv++; + test = 0; + if (argc && strcmp(*argv, "-h") == 0) { + fprintf(stderr, "gun 1.3 (12 Jun 2005)\n"); + fprintf(stderr, "Copyright (c) 2005 Mark Adler\n"); + fprintf(stderr, "usage: gun [-t] [file1.gz [file2.Z ...]]\n"); + return 0; + } + if (argc && strcmp(*argv, "-t") == 0) { + test = 1; + argc--; + argv++; + } + if (argc) + do { + if (test) + outname = NULL; + else { + len = (int)strlen(*argv); + if (strcmp(*argv + len - 3, ".gz") == 0 || + strcmp(*argv + len - 3, "-gz") == 0) + len -= 3; + else if (strcmp(*argv + len - 2, ".z") == 0 || + strcmp(*argv + len - 2, "-z") == 0 || + strcmp(*argv + len - 2, "_z") == 0 || + strcmp(*argv + len - 2, ".Z") == 0) + len -= 2; + else { + fprintf(stderr, "gun error: no gz type on %s--skipping\n", + *argv); + continue; + } + outname = malloc(len + 1); + if (outname == NULL) { + fprintf(stderr, "gun out of memory error--aborting\n"); + ret = 1; + break; + } + memcpy(outname, *argv, len); + outname[len] = 0; + } + ret = gunzip(&strm, *argv, outname, test); + if (outname != NULL) free(outname); + if (ret) break; + } while (argv++, --argc); + else + ret = gunzip(&strm, NULL, NULL, test); + + /* clean up */ + inflateBackEnd(&strm); + return ret; +} Added: external/zlib/examples/gzappend.c ============================================================================== --- (empty file) +++ external/zlib/examples/gzappend.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,500 @@ +/* gzappend -- command to append to a gzip file + + Copyright (C) 2003 Mark Adler, all rights reserved + version 1.1, 4 Nov 2003 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler at alumni.caltech.edu + */ + +/* + * Change history: + * + * 1.0 19 Oct 2003 - First version + * 1.1 4 Nov 2003 - Expand and clarify some comments and notes + * - Add version and copyright to help + * - Send help to stdout instead of stderr + * - Add some preemptive typecasts + * - Add L to constants in lseek() calls + * - Remove some debugging information in error messages + * - Use new data_type definition for zlib 1.2.1 + * - Simplfy and unify file operations + * - Finish off gzip file in gztack() + * - Use deflatePrime() instead of adding empty blocks + * - Keep gzip file clean on appended file read errors + * - Use in-place rotate instead of auxiliary buffer + * (Why you ask? Because it was fun to write!) + */ + +/* + gzappend takes a gzip file and appends to it, compressing files from the + command line or data from stdin. The gzip file is written to directly, to + avoid copying that file, in case it's large. Note that this results in the + unfriendly behavior that if gzappend fails, the gzip file is corrupted. + + This program was written to illustrate the use of the new Z_BLOCK option of + zlib 1.2.x's inflate() function. This option returns from inflate() at each + block boundary to facilitate locating and modifying the last block bit at + the start of the final deflate block. Also whether using Z_BLOCK or not, + another required feature of zlib 1.2.x is that inflate() now provides the + number of unusued bits in the last input byte used. gzappend will not work + with versions of zlib earlier than 1.2.1. + + gzappend first decompresses the gzip file internally, discarding all but + the last 32K of uncompressed data, and noting the location of the last block + bit and the number of unused bits in the last byte of the compressed data. + The gzip trailer containing the CRC-32 and length of the uncompressed data + is verified. This trailer will be later overwritten. + + Then the last block bit is cleared by seeking back in the file and rewriting + the byte that contains it. Seeking forward, the last byte of the compressed + data is saved along with the number of unused bits to initialize deflate. + + A deflate process is initialized, using the last 32K of the uncompressed + data from the gzip file to initialize the dictionary. If the total + uncompressed data was less than 32K, then all of it is used to initialize + the dictionary. The deflate output bit buffer is also initialized with the + last bits from the original deflate stream. From here on, the data to + append is simply compressed using deflate, and written to the gzip file. + When that is complete, the new CRC-32 and uncompressed length are written + as the trailer of the gzip file. + */ + +#include +#include +#include +#include +#include +#include "zlib.h" + +#define local static +#define LGCHUNK 14 +#define CHUNK (1U << LGCHUNK) +#define DSIZE 32768U + +/* print an error message and terminate with extreme prejudice */ +local void bye(char *msg1, char *msg2) +{ + fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2); + exit(1); +} + +/* return the greatest common divisor of a and b using Euclid's algorithm, + modified to be fast when one argument much greater than the other, and + coded to avoid unnecessary swapping */ +local unsigned gcd(unsigned a, unsigned b) +{ + unsigned c; + + while (a && b) + if (a > b) { + c = b; + while (a - c >= c) + c <<= 1; + a -= c; + } + else { + c = a; + while (b - c >= c) + c <<= 1; + b -= c; + } + return a + b; +} + +/* rotate list[0..len-1] left by rot positions, in place */ +local void rotate(unsigned char *list, unsigned len, unsigned rot) +{ + unsigned char tmp; + unsigned cycles; + unsigned char *start, *last, *to, *from; + + /* normalize rot and handle degenerate cases */ + if (len < 2) return; + if (rot >= len) rot %= len; + if (rot == 0) return; + + /* pointer to last entry in list */ + last = list + (len - 1); + + /* do simple left shift by one */ + if (rot == 1) { + tmp = *list; + memcpy(list, list + 1, len - 1); + *last = tmp; + return; + } + + /* do simple right shift by one */ + if (rot == len - 1) { + tmp = *last; + memmove(list + 1, list, len - 1); + *list = tmp; + return; + } + + /* otherwise do rotate as a set of cycles in place */ + cycles = gcd(len, rot); /* number of cycles */ + do { + start = from = list + cycles; /* start index is arbitrary */ + tmp = *from; /* save entry to be overwritten */ + for (;;) { + to = from; /* next step in cycle */ + from += rot; /* go right rot positions */ + if (from > last) from -= len; /* (pointer better not wrap) */ + if (from == start) break; /* all but one shifted */ + *to = *from; /* shift left */ + } + *to = tmp; /* complete the circle */ + } while (--cycles); +} + +/* structure for gzip file read operations */ +typedef struct { + int fd; /* file descriptor */ + int size; /* 1 << size is bytes in buf */ + unsigned left; /* bytes available at next */ + unsigned char *buf; /* buffer */ + unsigned char *next; /* next byte in buffer */ + char *name; /* file name for error messages */ +} file; + +/* reload buffer */ +local int readin(file *in) +{ + int len; + + len = read(in->fd, in->buf, 1 << in->size); + if (len == -1) bye("error reading ", in->name); + in->left = (unsigned)len; + in->next = in->buf; + return len; +} + +/* read from file in, exit if end-of-file */ +local int readmore(file *in) +{ + if (readin(in) == 0) bye("unexpected end of ", in->name); + return 0; +} + +#define read1(in) (in->left == 0 ? readmore(in) : 0, \ + in->left--, *(in->next)++) + +/* skip over n bytes of in */ +local void skip(file *in, unsigned n) +{ + unsigned bypass; + + if (n > in->left) { + n -= in->left; + bypass = n & ~((1U << in->size) - 1); + if (bypass) { + if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1) + bye("seeking ", in->name); + n -= bypass; + } + readmore(in); + if (n > in->left) + bye("unexpected end of ", in->name); + } + in->left -= n; + in->next += n; +} + +/* read a four-byte unsigned integer, little-endian, from in */ +unsigned long read4(file *in) +{ + unsigned long val; + + val = read1(in); + val += (unsigned)read1(in) << 8; + val += (unsigned long)read1(in) << 16; + val += (unsigned long)read1(in) << 24; + return val; +} + +/* skip over gzip header */ +local void gzheader(file *in) +{ + int flags; + unsigned n; + + if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file"); + if (read1(in) != 8) bye("unknown compression method in", in->name); + flags = read1(in); + if (flags & 0xe0) bye("unknown header flags set in", in->name); + skip(in, 6); + if (flags & 4) { + n = read1(in); + n += (unsigned)(read1(in)) << 8; + skip(in, n); + } + if (flags & 8) while (read1(in) != 0) ; + if (flags & 16) while (read1(in) != 0) ; + if (flags & 2) skip(in, 2); +} + +/* decompress gzip file "name", return strm with a deflate stream ready to + continue compression of the data in the gzip file, and return a file + descriptor pointing to where to write the compressed data -- the deflate + stream is initialized to compress using level "level" */ +local int gzscan(char *name, z_stream *strm, int level) +{ + int ret, lastbit, left, full; + unsigned have; + unsigned long crc, tot; + unsigned char *window; + off_t lastoff, end; + file gz; + + /* open gzip file */ + gz.name = name; + gz.fd = open(name, O_RDWR, 0); + if (gz.fd == -1) bye("cannot open ", name); + gz.buf = malloc(CHUNK); + if (gz.buf == NULL) bye("out of memory", ""); + gz.size = LGCHUNK; + gz.left = 0; + + /* skip gzip header */ + gzheader(&gz); + + /* prepare to decompress */ + window = malloc(DSIZE); + if (window == NULL) bye("out of memory", ""); + strm->zalloc = Z_NULL; + strm->zfree = Z_NULL; + strm->opaque = Z_NULL; + ret = inflateInit2(strm, -15); + if (ret != Z_OK) bye("out of memory", " or library mismatch"); + + /* decompress the deflate stream, saving append information */ + lastbit = 0; + lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left; + left = 0; + strm->avail_in = gz.left; + strm->next_in = gz.next; + crc = crc32(0L, Z_NULL, 0); + have = full = 0; + do { + /* if needed, get more input */ + if (strm->avail_in == 0) { + readmore(&gz); + strm->avail_in = gz.left; + strm->next_in = gz.next; + } + + /* set up output to next available section of sliding window */ + strm->avail_out = DSIZE - have; + strm->next_out = window + have; + + /* inflate and check for errors */ + ret = inflate(strm, Z_BLOCK); + if (ret == Z_STREAM_ERROR) bye("internal stream error!", ""); + if (ret == Z_MEM_ERROR) bye("out of memory", ""); + if (ret == Z_DATA_ERROR) + bye("invalid compressed data--format violated in", name); + + /* update crc and sliding window pointer */ + crc = crc32(crc, window + have, DSIZE - have - strm->avail_out); + if (strm->avail_out) + have = DSIZE - strm->avail_out; + else { + have = 0; + full = 1; + } + + /* process end of block */ + if (strm->data_type & 128) { + if (strm->data_type & 64) + left = strm->data_type & 0x1f; + else { + lastbit = strm->data_type & 0x1f; + lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in; + } + } + } while (ret != Z_STREAM_END); + inflateEnd(strm); + gz.left = strm->avail_in; + gz.next = strm->next_in; + + /* save the location of the end of the compressed data */ + end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left; + + /* check gzip trailer and save total for deflate */ + if (crc != read4(&gz)) + bye("invalid compressed data--crc mismatch in ", name); + tot = strm->total_out; + if ((tot & 0xffffffffUL) != read4(&gz)) + bye("invalid compressed data--length mismatch in", name); + + /* if not at end of file, warn */ + if (gz.left || readin(&gz)) + fprintf(stderr, + "gzappend warning: junk at end of gzip file overwritten\n"); + + /* clear last block bit */ + lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET); + if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name); + *gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7))); + lseek(gz.fd, -1L, SEEK_CUR); + if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name); + + /* if window wrapped, build dictionary from window by rotating */ + if (full) { + rotate(window, DSIZE, have); + have = DSIZE; + } + + /* set up deflate stream with window, crc, total_in, and leftover bits */ + ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); + if (ret != Z_OK) bye("out of memory", ""); + deflateSetDictionary(strm, window, have); + strm->adler = crc; + strm->total_in = tot; + if (left) { + lseek(gz.fd, --end, SEEK_SET); + if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name); + deflatePrime(strm, 8 - left, *gz.buf); + } + lseek(gz.fd, end, SEEK_SET); + + /* clean up and return */ + free(window); + free(gz.buf); + return gz.fd; +} + +/* append file "name" to gzip file gd using deflate stream strm -- if last + is true, then finish off the deflate stream at the end */ +local void gztack(char *name, int gd, z_stream *strm, int last) +{ + int fd, len, ret; + unsigned left; + unsigned char *in, *out; + + /* open file to compress and append */ + fd = 0; + if (name != NULL) { + fd = open(name, O_RDONLY, 0); + if (fd == -1) + fprintf(stderr, "gzappend warning: %s not found, skipping ...\n", + name); + } + + /* allocate buffers */ + in = fd == -1 ? NULL : malloc(CHUNK); + out = malloc(CHUNK); + if (out == NULL) bye("out of memory", ""); + + /* compress input file and append to gzip file */ + do { + /* get more input */ + len = fd == -1 ? 0 : read(fd, in, CHUNK); + if (len == -1) { + fprintf(stderr, + "gzappend warning: error reading %s, skipping rest ...\n", + name); + len = 0; + } + strm->avail_in = (unsigned)len; + strm->next_in = in; + if (len) strm->adler = crc32(strm->adler, in, (unsigned)len); + + /* compress and write all available output */ + do { + strm->avail_out = CHUNK; + strm->next_out = out; + ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH); + left = CHUNK - strm->avail_out; + while (left) { + len = write(gd, out + CHUNK - strm->avail_out - left, left); + if (len == -1) bye("writing gzip file", ""); + left -= (unsigned)len; + } + } while (strm->avail_out == 0 && ret != Z_STREAM_END); + } while (len != 0); + + /* write trailer after last entry */ + if (last) { + deflateEnd(strm); + out[0] = (unsigned char)(strm->adler); + out[1] = (unsigned char)(strm->adler >> 8); + out[2] = (unsigned char)(strm->adler >> 16); + out[3] = (unsigned char)(strm->adler >> 24); + out[4] = (unsigned char)(strm->total_in); + out[5] = (unsigned char)(strm->total_in >> 8); + out[6] = (unsigned char)(strm->total_in >> 16); + out[7] = (unsigned char)(strm->total_in >> 24); + len = 8; + do { + ret = write(gd, out + 8 - len, len); + if (ret == -1) bye("writing gzip file", ""); + len -= ret; + } while (len); + close(gd); + } + + /* clean up and return */ + free(out); + if (in != NULL) free(in); + if (fd > 0) close(fd); +} + +/* process the compression level option if present, scan the gzip file, and + append the specified files, or append the data from stdin if no other file + names are provided on the command line -- the gzip file must be writable + and seekable */ +int main(int argc, char **argv) +{ + int gd, level; + z_stream strm; + + /* ignore command name */ + argv++; + + /* provide usage if no arguments */ + if (*argv == NULL) { + printf("gzappend 1.1 (4 Nov 2003) Copyright (C) 2003 Mark Adler\n"); + printf( + "usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n"); + return 0; + } + + /* set compression level */ + level = Z_DEFAULT_COMPRESSION; + if (argv[0][0] == '-') { + if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0) + bye("invalid compression level", ""); + level = argv[0][1] - '0'; + if (*++argv == NULL) bye("no gzip file name after options", ""); + } + + /* prepare to append to gzip file */ + gd = gzscan(*argv++, &strm, level); + + /* append files on command line, or from stdin if none */ + if (*argv == NULL) + gztack(NULL, gd, &strm, 1); + else + do { + gztack(*argv, gd, &strm, argv[1] == NULL); + } while (*++argv != NULL); + return 0; +} Added: external/zlib/examples/gzjoin.c ============================================================================== --- (empty file) +++ external/zlib/examples/gzjoin.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,448 @@ +/* gzjoin -- command to join gzip files into one gzip file + + Copyright (C) 2004 Mark Adler, all rights reserved + version 1.0, 11 Dec 2004 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler at alumni.caltech.edu + */ + +/* + * Change history: + * + * 1.0 11 Dec 2004 - First version + * 1.1 12 Jun 2005 - Changed ssize_t to long for portability + */ + +/* + gzjoin takes one or more gzip files on the command line and writes out a + single gzip file that will uncompress to the concatenation of the + uncompressed data from the individual gzip files. gzjoin does this without + having to recompress any of the data and without having to calculate a new + crc32 for the concatenated uncompressed data. gzjoin does however have to + decompress all of the input data in order to find the bits in the compressed + data that need to be modified to concatenate the streams. + + gzjoin does not do an integrity check on the input gzip files other than + checking the gzip header and decompressing the compressed data. They are + otherwise assumed to be complete and correct. + + Each joint between gzip files removes at least 18 bytes of previous trailer + and subsequent header, and inserts an average of about three bytes to the + compressed data in order to connect the streams. The output gzip file + has a minimal ten-byte gzip header with no file name or modification time. + + This program was written to illustrate the use of the Z_BLOCK option of + inflate() and the crc32_combine() function. gzjoin will not compile with + versions of zlib earlier than 1.2.3. + */ + +#include /* fputs(), fprintf(), fwrite(), putc() */ +#include /* exit(), malloc(), free() */ +#include /* open() */ +#include /* close(), read(), lseek() */ +#include "zlib.h" + /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */ + +#define local static + +/* exit with an error (return a value to allow use in an expression) */ +local int bail(char *why1, char *why2) +{ + fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2); + exit(1); + return 0; +} + +/* -- simple buffered file input with access to the buffer -- */ + +#define CHUNK 32768 /* must be a power of two and fit in unsigned */ + +/* bin buffered input file type */ +typedef struct { + char *name; /* name of file for error messages */ + int fd; /* file descriptor */ + unsigned left; /* bytes remaining at next */ + unsigned char *next; /* next byte to read */ + unsigned char *buf; /* allocated buffer of length CHUNK */ +} bin; + +/* close a buffered file and free allocated memory */ +local void bclose(bin *in) +{ + if (in != NULL) { + if (in->fd != -1) + close(in->fd); + if (in->buf != NULL) + free(in->buf); + free(in); + } +} + +/* open a buffered file for input, return a pointer to type bin, or NULL on + failure */ +local bin *bopen(char *name) +{ + bin *in; + + in = malloc(sizeof(bin)); + if (in == NULL) + return NULL; + in->buf = malloc(CHUNK); + in->fd = open(name, O_RDONLY, 0); + if (in->buf == NULL || in->fd == -1) { + bclose(in); + return NULL; + } + in->left = 0; + in->next = in->buf; + in->name = name; + return in; +} + +/* load buffer from file, return -1 on read error, 0 or 1 on success, with + 1 indicating that end-of-file was reached */ +local int bload(bin *in) +{ + long len; + + if (in == NULL) + return -1; + if (in->left != 0) + return 0; + in->next = in->buf; + do { + len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left); + if (len < 0) + return -1; + in->left += (unsigned)len; + } while (len != 0 && in->left < CHUNK); + return len == 0 ? 1 : 0; +} + +/* get a byte from the file, bail if end of file */ +#define bget(in) (in->left ? 0 : bload(in), \ + in->left ? (in->left--, *(in->next)++) : \ + bail("unexpected end of file on ", in->name)) + +/* get a four-byte little-endian unsigned integer from file */ +local unsigned long bget4(bin *in) +{ + unsigned long val; + + val = bget(in); + val += (unsigned long)(bget(in)) << 8; + val += (unsigned long)(bget(in)) << 16; + val += (unsigned long)(bget(in)) << 24; + return val; +} + +/* skip bytes in file */ +local void bskip(bin *in, unsigned skip) +{ + /* check pointer */ + if (in == NULL) + return; + + /* easy case -- skip bytes in buffer */ + if (skip <= in->left) { + in->left -= skip; + in->next += skip; + return; + } + + /* skip what's in buffer, discard buffer contents */ + skip -= in->left; + in->left = 0; + + /* seek past multiples of CHUNK bytes */ + if (skip > CHUNK) { + unsigned left; + + left = skip & (CHUNK - 1); + if (left == 0) { + /* exact number of chunks: seek all the way minus one byte to check + for end-of-file with a read */ + lseek(in->fd, skip - 1, SEEK_CUR); + if (read(in->fd, in->buf, 1) != 1) + bail("unexpected end of file on ", in->name); + return; + } + + /* skip the integral chunks, update skip with remainder */ + lseek(in->fd, skip - left, SEEK_CUR); + skip = left; + } + + /* read more input and skip remainder */ + bload(in); + if (skip > in->left) + bail("unexpected end of file on ", in->name); + in->left -= skip; + in->next += skip; +} + +/* -- end of buffered input functions -- */ + +/* skip the gzip header from file in */ +local void gzhead(bin *in) +{ + int flags; + + /* verify gzip magic header and compression method */ + if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8) + bail(in->name, " is not a valid gzip file"); + + /* get and verify flags */ + flags = bget(in); + if ((flags & 0xe0) != 0) + bail("unknown reserved bits set in ", in->name); + + /* skip modification time, extra flags, and os */ + bskip(in, 6); + + /* skip extra field if present */ + if (flags & 4) { + unsigned len; + + len = bget(in); + len += (unsigned)(bget(in)) << 8; + bskip(in, len); + } + + /* skip file name if present */ + if (flags & 8) + while (bget(in) != 0) + ; + + /* skip comment if present */ + if (flags & 16) + while (bget(in) != 0) + ; + + /* skip header crc if present */ + if (flags & 2) + bskip(in, 2); +} + +/* write a four-byte little-endian unsigned integer to out */ +local void put4(unsigned long val, FILE *out) +{ + putc(val & 0xff, out); + putc((val >> 8) & 0xff, out); + putc((val >> 16) & 0xff, out); + putc((val >> 24) & 0xff, out); +} + +/* Load up zlib stream from buffered input, bail if end of file */ +local void zpull(z_streamp strm, bin *in) +{ + if (in->left == 0) + bload(in); + if (in->left == 0) + bail("unexpected end of file on ", in->name); + strm->avail_in = in->left; + strm->next_in = in->next; +} + +/* Write header for gzip file to out and initialize trailer. */ +local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out) +{ + fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); + *crc = crc32(0L, Z_NULL, 0); + *tot = 0; +} + +/* Copy the compressed data from name, zeroing the last block bit of the last + block if clr is true, and adding empty blocks as needed to get to a byte + boundary. If clr is false, then the last block becomes the last block of + the output, and the gzip trailer is written. crc and tot maintains the + crc and length (modulo 2^32) of the output for the trailer. The resulting + gzip file is written to out. gzinit() must be called before the first call + of gzcopy() to write the gzip header and to initialize crc and tot. */ +local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot, + FILE *out) +{ + int ret; /* return value from zlib functions */ + int pos; /* where the "last block" bit is in byte */ + int last; /* true if processing the last block */ + bin *in; /* buffered input file */ + unsigned char *start; /* start of compressed data in buffer */ + unsigned char *junk; /* buffer for uncompressed data -- discarded */ + z_off_t len; /* length of uncompressed data (support > 4 GB) */ + z_stream strm; /* zlib inflate stream */ + + /* open gzip file and skip header */ + in = bopen(name); + if (in == NULL) + bail("could not open ", name); + gzhead(in); + + /* allocate buffer for uncompressed data and initialize raw inflate + stream */ + junk = malloc(CHUNK); + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, -15); + if (junk == NULL || ret != Z_OK) + bail("out of memory", ""); + + /* inflate and copy compressed data, clear last-block bit if requested */ + len = 0; + zpull(&strm, in); + start = strm.next_in; + last = start[0] & 1; + if (last && clr) + start[0] &= ~1; + strm.avail_out = 0; + for (;;) { + /* if input used and output done, write used input and get more */ + if (strm.avail_in == 0 && strm.avail_out != 0) { + fwrite(start, 1, strm.next_in - start, out); + start = in->buf; + in->left = 0; + zpull(&strm, in); + } + + /* decompress -- return early when end-of-block reached */ + strm.avail_out = CHUNK; + strm.next_out = junk; + ret = inflate(&strm, Z_BLOCK); + switch (ret) { + case Z_MEM_ERROR: + bail("out of memory", ""); + case Z_DATA_ERROR: + bail("invalid compressed data in ", in->name); + } + + /* update length of uncompressed data */ + len += CHUNK - strm.avail_out; + + /* check for block boundary (only get this when block copied out) */ + if (strm.data_type & 128) { + /* if that was the last block, then done */ + if (last) + break; + + /* number of unused bits in last byte */ + pos = strm.data_type & 7; + + /* find the next last-block bit */ + if (pos != 0) { + /* next last-block bit is in last used byte */ + pos = 0x100 >> pos; + last = strm.next_in[-1] & pos; + if (last && clr) + strm.next_in[-1] &= ~pos; + } + else { + /* next last-block bit is in next unused byte */ + if (strm.avail_in == 0) { + /* don't have that byte yet -- get it */ + fwrite(start, 1, strm.next_in - start, out); + start = in->buf; + in->left = 0; + zpull(&strm, in); + } + last = strm.next_in[0] & 1; + if (last && clr) + strm.next_in[0] &= ~1; + } + } + } + + /* update buffer with unused input */ + in->left = strm.avail_in; + in->next = strm.next_in; + + /* copy used input, write empty blocks to get to byte boundary */ + pos = strm.data_type & 7; + fwrite(start, 1, in->next - start - 1, out); + last = in->next[-1]; + if (pos == 0 || !clr) + /* already at byte boundary, or last file: write last byte */ + putc(last, out); + else { + /* append empty blocks to last byte */ + last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */ + if (pos & 1) { + /* odd -- append an empty stored block */ + putc(last, out); + if (pos == 1) + putc(0, out); /* two more bits in block header */ + fwrite("\0\0\xff\xff", 1, 4, out); + } + else { + /* even -- append 1, 2, or 3 empty fixed blocks */ + switch (pos) { + case 6: + putc(last | 8, out); + last = 0; + case 4: + putc(last | 0x20, out); + last = 0; + case 2: + putc(last | 0x80, out); + putc(0, out); + } + } + } + + /* update crc and tot */ + *crc = crc32_combine(*crc, bget4(in), len); + *tot += (unsigned long)len; + + /* clean up */ + inflateEnd(&strm); + free(junk); + bclose(in); + + /* write trailer if this is the last gzip file */ + if (!clr) { + put4(*crc, out); + put4(*tot, out); + } +} + +/* join the gzip files on the command line, write result to stdout */ +int main(int argc, char **argv) +{ + unsigned long crc, tot; /* running crc and total uncompressed length */ + + /* skip command name */ + argc--; + argv++; + + /* show usage if no arguments */ + if (argc == 0) { + fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n", + stderr); + return 0; + } + + /* join gzip files on command line and write to stdout */ + gzinit(&crc, &tot, stdout); + while (argc--) + gzcopy(*argv++, argc, &crc, &tot, stdout); + + /* done */ + return 0; +} Added: external/zlib/examples/gzlog.c ============================================================================== --- (empty file) +++ external/zlib/examples/gzlog.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,413 @@ +/* + * gzlog.c + * Copyright (C) 2004 Mark Adler + * For conditions of distribution and use, see copyright notice in gzlog.h + * version 1.0, 26 Nov 2004 + * + */ + +#include /* memcmp() */ +#include /* malloc(), free(), NULL */ +#include /* size_t, off_t */ +#include /* read(), close(), sleep(), ftruncate(), */ + /* lseek() */ +#include /* open() */ +#include /* flock() */ +#include "zlib.h" /* deflateInit2(), deflate(), deflateEnd() */ + +#include "gzlog.h" /* interface */ +#define local static + +/* log object structure */ +typedef struct { + int id; /* object identifier */ + int fd; /* log file descriptor */ + off_t extra; /* offset of extra "ap" subfield */ + off_t mark_off; /* offset of marked data */ + off_t last_off; /* offset of last block */ + unsigned long crc; /* uncompressed crc */ + unsigned long len; /* uncompressed length (modulo 2^32) */ + unsigned stored; /* length of current stored block */ +} gz_log; + +#define GZLOGID 19334 /* gz_log object identifier */ + +#define LOCK_RETRY 1 /* retry lock once a second */ +#define LOCK_PATIENCE 1200 /* try about twenty minutes before forcing */ + +/* acquire a lock on a file */ +local int lock(int fd) +{ + int patience; + + /* try to lock every LOCK_RETRY seconds for LOCK_PATIENCE seconds */ + patience = LOCK_PATIENCE; + do { + if (flock(fd, LOCK_EX + LOCK_NB) == 0) + return 0; + (void)sleep(LOCK_RETRY); + patience -= LOCK_RETRY; + } while (patience > 0); + + /* we've run out of patience -- give up */ + return -1; +} + +/* release lock */ +local void unlock(int fd) +{ + (void)flock(fd, LOCK_UN); +} + +/* release a log object */ +local void log_clean(gz_log *log) +{ + unlock(log->fd); + (void)close(log->fd); + free(log); +} + +/* read an unsigned long from a byte buffer little-endian */ +local unsigned long make_ulg(unsigned char *buf) +{ + int n; + unsigned long val; + + val = (unsigned long)(*buf++); + for (n = 8; n < 32; n += 8) + val += (unsigned long)(*buf++) << n; + return val; +} + +/* read an off_t from a byte buffer little-endian */ +local off_t make_off(unsigned char *buf) +{ + int n; + off_t val; + + val = (off_t)(*buf++); + for (n = 8; n < 64; n += 8) + val += (off_t)(*buf++) << n; + return val; +} + +/* write an unsigned long little-endian to byte buffer */ +local void dice_ulg(unsigned long val, unsigned char *buf) +{ + int n; + + for (n = 0; n < 4; n++) { + *buf++ = val & 0xff; + val >>= 8; + } +} + +/* write an off_t little-endian to byte buffer */ +local void dice_off(off_t val, unsigned char *buf) +{ + int n; + + for (n = 0; n < 8; n++) { + *buf++ = val & 0xff; + val >>= 8; + } +} + +/* initial, empty gzip file for appending */ +local char empty_gz[] = { + 0x1f, 0x8b, /* magic gzip id */ + 8, /* compression method is deflate */ + 4, /* there is an extra field */ + 0, 0, 0, 0, /* no modification time provided */ + 0, 0xff, /* no extra flags, no OS */ + 20, 0, 'a', 'p', 16, 0, /* extra field with "ap" subfield */ + 32, 0, 0, 0, 0, 0, 0, 0, /* offset of uncompressed data */ + 32, 0, 0, 0, 0, 0, 0, 0, /* offset of last block */ + 1, 0, 0, 0xff, 0xff, /* empty stored block (last) */ + 0, 0, 0, 0, /* crc */ + 0, 0, 0, 0 /* uncompressed length */ +}; + +/* initialize a log object with locking */ +void *gzlog_open(char *path) +{ + unsigned xlen; + unsigned char temp[20]; + unsigned sub_len; + int good; + gz_log *log; + + /* allocate log structure */ + log = malloc(sizeof(gz_log)); + if (log == NULL) + return NULL; + log->id = GZLOGID; + + /* open file, creating it if necessary, and locking it */ + log->fd = open(path, O_RDWR | O_CREAT, 0600); + if (log->fd < 0) { + free(log); + return NULL; + } + if (lock(log->fd)) { + close(log->fd); + free(log); + return NULL; + } + + /* if file is empty, write new gzip stream */ + if (lseek(log->fd, 0, SEEK_END) == 0) { + if (write(log->fd, empty_gz, sizeof(empty_gz)) != sizeof(empty_gz)) { + log_clean(log); + return NULL; + } + } + + /* check gzip header */ + (void)lseek(log->fd, 0, SEEK_SET); + if (read(log->fd, temp, 12) != 12 || temp[0] != 0x1f || + temp[1] != 0x8b || temp[2] != 8 || (temp[3] & 4) == 0) { + log_clean(log); + return NULL; + } + + /* process extra field to find "ap" sub-field */ + xlen = temp[10] + (temp[11] << 8); + good = 0; + while (xlen) { + if (xlen < 4 || read(log->fd, temp, 4) != 4) + break; + sub_len = temp[2]; + sub_len += temp[3] << 8; + xlen -= 4; + if (memcmp(temp, "ap", 2) == 0 && sub_len == 16) { + good = 1; + break; + } + if (xlen < sub_len) + break; + (void)lseek(log->fd, sub_len, SEEK_CUR); + xlen -= sub_len; + } + if (!good) { + log_clean(log); + return NULL; + } + + /* read in "ap" sub-field */ + log->extra = lseek(log->fd, 0, SEEK_CUR); + if (read(log->fd, temp, 16) != 16) { + log_clean(log); + return NULL; + } + log->mark_off = make_off(temp); + log->last_off = make_off(temp + 8); + + /* get crc, length of gzip file */ + (void)lseek(log->fd, log->last_off, SEEK_SET); + if (read(log->fd, temp, 13) != 13 || + memcmp(temp, "\001\000\000\377\377", 5) != 0) { + log_clean(log); + return NULL; + } + log->crc = make_ulg(temp + 5); + log->len = make_ulg(temp + 9); + + /* set up to write over empty last block */ + (void)lseek(log->fd, log->last_off + 5, SEEK_SET); + log->stored = 0; + return (void *)log; +} + +/* maximum amount to put in a stored block before starting a new one */ +#define MAX_BLOCK 16384 + +/* write a block to a log object */ +int gzlog_write(void *obj, char *data, size_t len) +{ + size_t some; + unsigned char temp[5]; + gz_log *log; + + /* check object */ + log = (gz_log *)obj; + if (log == NULL || log->id != GZLOGID) + return 1; + + /* write stored blocks until all of the input is written */ + do { + some = MAX_BLOCK - log->stored; + if (some > len) + some = len; + if (write(log->fd, data, some) != some) + return 1; + log->crc = crc32(log->crc, data, some); + log->len += some; + len -= some; + data += some; + log->stored += some; + + /* if the stored block is full, end it and start another */ + if (log->stored == MAX_BLOCK) { + (void)lseek(log->fd, log->last_off, SEEK_SET); + temp[0] = 0; + dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16), + temp + 1); + if (write(log->fd, temp, 5) != 5) + return 1; + log->last_off = lseek(log->fd, log->stored, SEEK_CUR); + (void)lseek(log->fd, 5, SEEK_CUR); + log->stored = 0; + } + } while (len); + return 0; +} + +/* recompress the remaining stored deflate data in place */ +local int recomp(gz_log *log) +{ + z_stream strm; + size_t len, max; + unsigned char *in; + unsigned char *out; + unsigned char temp[16]; + + /* allocate space and read it all in (it's around 1 MB) */ + len = log->last_off - log->mark_off; + max = len + (len >> 12) + (len >> 14) + 11; + out = malloc(max); + if (out == NULL) + return 1; + in = malloc(len); + if (in == NULL) { + free(out); + return 1; + } + (void)lseek(log->fd, log->mark_off, SEEK_SET); + if (read(log->fd, in, len) != len) { + free(in); + free(out); + return 1; + } + + /* recompress in memory, decoding stored data as we go */ + /* note: this assumes that unsigned is four bytes or more */ + /* consider not making that assumption */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + if (deflateInit2(&strm, Z_BEST_COMPRESSION, Z_DEFLATED, -15, 8, + Z_DEFAULT_STRATEGY) != Z_OK) { + free(in); + free(out); + return 1; + } + strm.next_in = in; + strm.avail_out = max; + strm.next_out = out; + while (len >= 5) { + if (strm.next_in[0] != 0) + break; + strm.avail_in = strm.next_in[1] + (strm.next_in[2] << 8); + strm.next_in += 5; + len -= 5; + if (strm.avail_in != 0) { + if (len < strm.avail_in) + break; + len -= strm.avail_in; + (void)deflate(&strm, Z_NO_FLUSH); + if (strm.avail_in != 0 || strm.avail_out == 0) + break; + } + } + (void)deflate(&strm, Z_SYNC_FLUSH); + (void)deflateEnd(&strm); + free(in); + if (len != 0 || strm.avail_out == 0) { + free(out); + return 1; + } + + /* overwrite stored data with compressed data */ + (void)lseek(log->fd, log->mark_off, SEEK_SET); + len = max - strm.avail_out; + if (write(log->fd, out, len) != len) { + free(out); + return 1; + } + free(out); + + /* write last empty block, crc, and length */ + log->mark_off = log->last_off = lseek(log->fd, 0, SEEK_CUR); + temp[0] = 1; + dice_ulg(0xffffL << 16, temp + 1); + dice_ulg(log->crc, temp + 5); + dice_ulg(log->len, temp + 9); + if (write(log->fd, temp, 13) != 13) + return 1; + + /* truncate file to discard remaining stored data and old trailer */ + ftruncate(log->fd, lseek(log->fd, 0, SEEK_CUR)); + + /* update extra field to point to new last empty block */ + (void)lseek(log->fd, log->extra, SEEK_SET); + dice_off(log->mark_off, temp); + dice_off(log->last_off, temp + 8); + if (write(log->fd, temp, 16) != 16) + return 1; + return 0; +} + +/* maximum accumulation of stored blocks before compressing */ +#define MAX_STORED 1048576 + +/* close log object */ +int gzlog_close(void *obj) +{ + unsigned char temp[8]; + gz_log *log; + + /* check object */ + log = (gz_log *)obj; + if (log == NULL || log->id != GZLOGID) + return 1; + + /* go to start of most recent block being written */ + (void)lseek(log->fd, log->last_off, SEEK_SET); + + /* if some stuff was put there, update block */ + if (log->stored) { + temp[0] = 0; + dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16), + temp + 1); + if (write(log->fd, temp, 5) != 5) + return 1; + log->last_off = lseek(log->fd, log->stored, SEEK_CUR); + } + + /* write last block (empty) */ + if (write(log->fd, "\001\000\000\377\377", 5) != 5) + return 1; + + /* write updated crc and uncompressed length */ + dice_ulg(log->crc, temp); + dice_ulg(log->len, temp + 4); + if (write(log->fd, temp, 8) != 8) + return 1; + + /* put offset of that last block in gzip extra block */ + (void)lseek(log->fd, log->extra + 8, SEEK_SET); + dice_off(log->last_off, temp); + if (write(log->fd, temp, 8) != 8) + return 1; + + /* if more than 1 MB stored, then time to compress it */ + if (log->last_off - log->mark_off > MAX_STORED) { + if (recomp(log)) + return 1; + } + + /* unlock and close file */ + log_clean(log); + return 0; +} Added: external/zlib/examples/gzlog.h ============================================================================== --- (empty file) +++ external/zlib/examples/gzlog.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,58 @@ +/* gzlog.h + Copyright (C) 2004 Mark Adler, all rights reserved + version 1.0, 26 Nov 2004 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler at alumni.caltech.edu + */ + +/* + The gzlog object allows writing short messages to a gzipped log file, + opening the log file locked for small bursts, and then closing it. The log + object works by appending stored data to the gzip file until 1 MB has been + accumulated. At that time, the stored data is compressed, and replaces the + uncompressed data in the file. The log file is truncated to its new size at + that time. After closing, the log file is always valid gzip file that can + decompressed to recover what was written. + + A gzip header "extra" field contains two file offsets for appending. The + first points to just after the last compressed data. The second points to + the last stored block in the deflate stream, which is empty. All of the + data between those pointers is uncompressed. + */ + +/* Open a gzlog object, creating the log file if it does not exist. Return + NULL on error. Note that gzlog_open() could take a long time to return if + there is difficulty in locking the file. */ +void *gzlog_open(char *path); + +/* Write to a gzlog object. Return non-zero on error. This function will + simply write data to the file uncompressed. Compression of the data + will not occur until gzlog_close() is called. It is expected that + gzlog_write() is used for a short message, and then gzlog_close() is + called. If a large amount of data is to be written, then the application + should write no more than 1 MB at a time with gzlog_write() before + calling gzlog_close() and then gzlog_open() again. */ +int gzlog_write(void *log, char *data, size_t len); + +/* Close a gzlog object. Return non-zero on error. The log file is locked + until this function is called. This function will compress stored data + at the end of the gzip file if at least 1 MB has been accumulated. Note + that the file will not be a valid gzip file until this function completes. + */ +int gzlog_close(void *log); Added: external/zlib/examples/zlib_how.html ============================================================================== --- (empty file) +++ external/zlib/examples/zlib_how.html Tue Jan 3 07:42:59 2006 @@ -0,0 +1,523 @@ + + + + +zlib Usage Example + + + +

    zlib Usage Example

    +We often get questions about how the deflate() and inflate() functions should be used. +Users wonder when they should provide more input, when they should use more output, +what to do with a Z_BUF_ERROR, how to make sure the process terminates properly, and +so on. So for those who have read zlib.h (a few times), and +would like further edification, below is an annotated example in C of simple routines to compress and decompress +from an input file to an output file using deflate() and inflate() respectively. The +annotations are interspersed between lines of the code. So please read between the lines. +We hope this helps explain some of the intricacies of zlib. +

    +Without further adieu, here is the program zpipe.c: +

    
    +/* zpipe.c: example of proper use of zlib's inflate() and deflate()
    +   Not copyrighted -- provided to the public domain
    +   Version 1.2  9 November 2004  Mark Adler */
    +
    +/* Version history:
    +   1.0  30 Oct 2004  First version
    +   1.1   8 Nov 2004  Add void casting for unused return values
    +                     Use switch statement for inflate() return values
    +   1.2   9 Nov 2004  Add assertions to document zlib guarantees
    + */
    +
    +We now include the header files for the required definitions. From +stdio.h we use fopen(), fread(), fwrite(), +feof(), ferror(), and fclose() for file i/o, and +fputs() for error messages. From string.h we use +strcmp() for command line argument processing. +From assert.h we use the assert() macro. +From zlib.h +we use the basic compression functions deflateInit(), +deflate(), and deflateEnd(), and the basic decompression +functions inflateInit(), inflate(), and +inflateEnd(). +
    
    +#include <stdio.h>
    +#include <string.h>
    +#include <assert.h>
    +#include "zlib.h"
    +
    +CHUNK is simply the buffer size for feeding data to and pulling data +from the zlib routines. Larger buffer sizes would be more efficient, +especially for inflate(). If the memory is available, buffers sizes +on the order of 128K or 256K bytes should be used. +
    
    +#define CHUNK 16384
    +
    +The def() routine compresses data from an input file to an output file. The output data +will be in the zlib format, which is different from the gzip or zip +formats. The zlib format has a very small header of only two bytes to identify it as +a zlib stream and to provide decoding information, and a four-byte trailer with a fast +check value to verify the integrity of the uncompressed data after decoding. +
    
    +/* Compress from file source to file dest until EOF on source.
    +   def() returns Z_OK on success, Z_MEM_ERROR if memory could not be
    +   allocated for processing, Z_STREAM_ERROR if an invalid compression
    +   level is supplied, Z_VERSION_ERROR if the version of zlib.h and the
    +   version of the library linked do not match, or Z_ERRNO if there is
    +   an error reading or writing the files. */
    +int def(FILE *source, FILE *dest, int level)
    +{
    +
    +Here are the local variables for def(). ret will be used for zlib +return codes. flush will keep track of the current flushing state for deflate(), +which is either no flushing, or flush to completion after the end of the input file is reached. +have is the amount of data returned from deflate(). The strm structure +is used to pass information to and from the zlib routines, and to maintain the +deflate() state. in and out are the input and output buffers for +deflate(). +
    
    +    int ret, flush;
    +    unsigned have;
    +    z_stream strm;
    +    char in[CHUNK];
    +    char out[CHUNK];
    +
    +The first thing we do is to initialize the zlib state for compression using +deflateInit(). This must be done before the first use of deflate(). +The zalloc, zfree, and opaque fields in the strm +structure must be initialized before calling deflateInit(). Here they are +set to the zlib constant Z_NULL to request that zlib use +the default memory allocation routines. An application may also choose to provide +custom memory allocation routines here. deflateInit() will allocate on the +order of 256K bytes for the internal state. +(See zlib Technical Details.) +

    +deflateInit() is called with a pointer to the structure to be initialized and +the compression level, which is an integer in the range of -1 to 9. Lower compression +levels result in faster execution, but less compression. Higher levels result in +greater compression, but slower execution. The zlib constant Z_DEFAULT_COMPRESSION, +equal to -1, +provides a good compromise between compression and speed and is equivalent to level 6. +Level 0 actually does no compression at all, and in fact expands the data slightly to produce +the zlib format (it is not a byte-for-byte copy of the input). +More advanced applications of zlib +may use deflateInit2() here instead. Such an application may want to reduce how +much memory will be used, at some price in compression. Or it may need to request a +gzip header and trailer instead of a zlib header and trailer, or raw +encoding with no header or trailer at all. +

    +We must check the return value of deflateInit() against the zlib constant +Z_OK to make sure that it was able to +allocate memory for the internal state, and that the provided arguments were valid. +deflateInit() will also check that the version of zlib that the zlib.h +file came from matches the version of zlib actually linked with the program. This +is especially important for environments in which zlib is a shared library. +

    +Note that an application can initialize multiple, independent zlib streams, which can +operate in parallel. The state information maintained in the structure allows the zlib +routines to be reentrant. +

    
    +    /* allocate deflate state */
    +    strm.zalloc = Z_NULL;
    +    strm.zfree = Z_NULL;
    +    strm.opaque = Z_NULL;
    +    ret = deflateInit(&strm, level);
    +    if (ret != Z_OK)
    +        return ret;
    +
    +With the pleasantries out of the way, now we can get down to business. The outer do-loop +reads all of the input file and exits at the bottom of the loop once end-of-file is reached. +This loop contains the only call of deflate(). So we must make sure that all of the +input data has been processed and that all of the output data has been generated and consumed +before we fall out of the loop at the bottom. +
    
    +    /* compress until end of file */
    +    do {
    +
    +We start off by reading data from the input file. The number of bytes read is put directly +into avail_in, and a pointer to those bytes is put into next_in. We also +check to see if end-of-file on the input has been reached. If we are at the end of file, then flush is set to the +zlib constant Z_FINISH, which is later passed to deflate() to +indicate that this is the last chunk of input data to compress. We need to use feof() +to check for end-of-file as opposed to seeing if fewer than CHUNK bytes have been read. The +reason is that if the input file length is an exact multiple of CHUNK, we will miss +the fact that we got to the end-of-file, and not know to tell deflate() to finish +up the compressed stream. If we are not yet at the end of the input, then the zlib +constant Z_NO_FLUSH will be passed to deflate to indicate that we are still +in the middle of the uncompressed data. +

    +If there is an error in reading from the input file, the process is aborted with +deflateEnd() being called to free the allocated zlib state before returning +the error. We wouldn't want a memory leak, now would we? deflateEnd() can be called +at any time after the state has been initialized. Once that's done, deflateInit() (or +deflateInit2()) would have to be called to start a new compression process. There is +no point here in checking the deflateEnd() return code. The deallocation can't fail. +

    
    +        strm.avail_in = fread(in, 1, CHUNK, source);
    +        if (ferror(source)) {
    +            (void)deflateEnd(&strm);
    +            return Z_ERRNO;
    +        }
    +        flush = feof(source) ? Z_FINISH : Z_NO_FLUSH;
    +        strm.next_in = in;
    +
    +The inner do-loop passes our chunk of input data to deflate(), and then +keeps calling deflate() until it is done producing output. Once there is no more +new output, deflate() is guaranteed to have consumed all of the input, i.e., +avail_in will be zero. +
    
    +        /* run deflate() on input until output buffer not full, finish
    +           compression if all of source has been read in */
    +        do {
    +
    +Output space is provided to deflate() by setting avail_out to the number +of available output bytes and next_out to a pointer to that space. +
    
    +            strm.avail_out = CHUNK;
    +            strm.next_out = out;
    +
    +Now we call the compression engine itself, deflate(). It takes as many of the +avail_in bytes at next_in as it can process, and writes as many as +avail_out bytes to next_out. Those counters and pointers are then +updated past the input data consumed and the output data written. It is the amount of +output space available that may limit how much input is consumed. +Hence the inner loop to make sure that +all of the input is consumed by providing more output space each time. Since avail_in +and next_in are updated by deflate(), we don't have to mess with those +between deflate() calls until it's all used up. +

    +The parameters to deflate() are a pointer to the strm structure containing +the input and output information and the internal compression engine state, and a parameter +indicating whether and how to flush data to the output. Normally deflate will consume +several K bytes of input data before producing any output (except for the header), in order +to accumulate statistics on the data for optimum compression. It will then put out a burst of +compressed data, and proceed to consume more input before the next burst. Eventually, +deflate() +must be told to terminate the stream, complete the compression with provided input data, and +write out the trailer check value. deflate() will continue to compress normally as long +as the flush parameter is Z_NO_FLUSH. Once the Z_FINISH parameter is provided, +deflate() will begin to complete the compressed output stream. However depending on how +much output space is provided, deflate() may have to be called several times until it +has provided the complete compressed stream, even after it has consumed all of the input. The flush +parameter must continue to be Z_FINISH for those subsequent calls. +

    +There are other values of the flush parameter that are used in more advanced applications. You can +force deflate() to produce a burst of output that encodes all of the input data provided +so far, even if it wouldn't have otherwise, for example to control data latency on a link with +compressed data. You can also ask that deflate() do that as well as erase any history up to +that point so that what follows can be decompressed independently, for example for random access +applications. Both requests will degrade compression by an amount depending on how often such +requests are made. +

    +deflate() has a return value that can indicate errors, yet we do not check it here. Why +not? Well, it turns out that deflate() can do no wrong here. Let's go through +deflate()'s return values and dispense with them one by one. The possible values are +Z_OK, Z_STREAM_END, Z_STREAM_ERROR, or Z_BUF_ERROR. Z_OK +is, well, ok. Z_STREAM_END is also ok and will be returned for the last call of +deflate(). This is already guaranteed by calling deflate() with Z_FINISH +until it has no more output. Z_STREAM_ERROR is only possible if the stream is not +initialized properly, but we did initialize it properly. There is no harm in checking for +Z_STREAM_ERROR here, for example to check for the possibility that some +other part of the application inadvertently clobbered the memory containing the zlib state. +Z_BUF_ERROR will be explained further below, but +suffice it to say that this is simply an indication that deflate() could not consume +more input or produce more output. deflate() can be called again with more output space +or more available input, which it will be in this code. +

    
    +            ret = deflate(&strm, flush);    /* no bad return value */
    +            assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
    +
    +Now we compute how much output deflate() provided on the last call, which is the +difference between how much space was provided before the call, and how much output space +is still available after the call. Then that data, if any, is written to the output file. +We can then reuse the output buffer for the next call of deflate(). Again if there +is a file i/o error, we call deflateEnd() before returning to avoid a memory leak. +
    
    +            have = CHUNK - strm.avail_out;
    +            if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
    +                (void)deflateEnd(&strm);
    +                return Z_ERRNO;
    +            }
    +
    +The inner do-loop is repeated until the last deflate() call fails to fill the +provided output buffer. Then we know that deflate() has done as much as it can with +the provided input, and that all of that input has been consumed. We can then fall out of this +loop and reuse the input buffer. +

    +The way we tell that deflate() has no more output is by seeing that it did not fill +the output buffer, leaving avail_out greater than zero. However suppose that +deflate() has no more output, but just so happened to exactly fill the output buffer! +avail_out is zero, and we can't tell that deflate() has done all it can. +As far as we know, deflate() +has more output for us. So we call it again. But now deflate() produces no output +at all, and avail_out remains unchanged as CHUNK. That deflate() call +wasn't able to do anything, either consume input or produce output, and so it returns +Z_BUF_ERROR. (See, I told you I'd cover this later.) However this is not a problem at +all. Now we finally have the desired indication that deflate() is really done, +and so we drop out of the inner loop to provide more input to deflate(). +

    +With flush set to Z_FINISH, this final set of deflate() calls will +complete the output stream. Once that is done, subsequent calls of deflate() would return +Z_STREAM_ERROR if the flush parameter is not Z_FINISH, and do no more processing +until the state is reinitialized. +

    +Some applications of zlib have two loops that call deflate() +instead of the single inner loop we have here. The first loop would call +without flushing and feed all of the data to deflate(). The second loop would call +deflate() with no more +data and the Z_FINISH parameter to complete the process. As you can see from this +example, that can be avoided by simply keeping track of the current flush state. +

    
    +        } while (strm.avail_out == 0);
    +        assert(strm.avail_in == 0);     /* all input will be used */
    +
    +Now we check to see if we have already processed all of the input file. That information was +saved in the flush variable, so we see if that was set to Z_FINISH. If so, +then we're done and we fall out of the outer loop. We're guaranteed to get Z_STREAM_END +from the last deflate() call, since we ran it until the last chunk of input was +consumed and all of the output was generated. +
    
    +        /* done when last data in file processed */
    +    } while (flush != Z_FINISH);
    +    assert(ret == Z_STREAM_END);        /* stream will be complete */
    +
    +The process is complete, but we still need to deallocate the state to avoid a memory leak +(or rather more like a memory hemorrhage if you didn't do this). Then +finally we can return with a happy return value. +
    
    +    /* clean up and return */
    +    (void)deflateEnd(&strm);
    +    return Z_OK;
    +}
    +
    +Now we do the same thing for decompression in the inf() routine. inf() +decompresses what is hopefully a valid zlib stream from the input file and writes the +uncompressed data to the output file. Much of the discussion above for def() +applies to inf() as well, so the discussion here will focus on the differences between +the two. +
    
    +/* Decompress from file source to file dest until stream ends or EOF.
    +   inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be
    +   allocated for processing, Z_DATA_ERROR if the deflate data is
    +   invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and
    +   the version of the library linked do not match, or Z_ERRNO if there
    +   is an error reading or writing the files. */
    +int inf(FILE *source, FILE *dest)
    +{
    +
    +The local variables have the same functionality as they do for def(). The +only difference is that there is no flush variable, since inflate() +can tell from the zlib stream itself when the stream is complete. +
    
    +    int ret;
    +    unsigned have;
    +    z_stream strm;
    +    char in[CHUNK];
    +    char out[CHUNK];
    +
    +The initialization of the state is the same, except that there is no compression level, +of course, and two more elements of the structure are initialized. avail_in +and next_in must be initialized before calling inflateInit(). This +is because the application has the option to provide the start of the zlib stream in +order for inflateInit() to have access to information about the compression +method to aid in memory allocation. In the current implementation of zlib +(up through versions 1.2.x), the method-dependent memory allocations are deferred to the first call of +inflate() anyway. However those fields must be initialized since later versions +of zlib that provide more compression methods may take advantage of this interface. +In any case, no decompression is performed by inflateInit(), so the +avail_out and next_out fields do not need to be initialized before calling. +

    +Here avail_in is set to zero and next_in is set to Z_NULL to +indicate that no input data is being provided. +

    
    +    /* allocate inflate state */
    +    strm.zalloc = Z_NULL;
    +    strm.zfree = Z_NULL;
    +    strm.opaque = Z_NULL;
    +    strm.avail_in = 0;
    +    strm.next_in = Z_NULL;
    +    ret = inflateInit(&strm);
    +    if (ret != Z_OK)
    +        return ret;
    +
    +The outer do-loop decompresses input until inflate() indicates +that it has reached the end of the compressed data and has produced all of the uncompressed +output. This is in contrast to def() which processes all of the input file. +If end-of-file is reached before the compressed data self-terminates, then the compressed +data is incomplete and an error is returned. +
    
    +    /* decompress until deflate stream ends or end of file */
    +    do {
    +
    +We read input data and set the strm structure accordingly. If we've reached the +end of the input file, then we leave the outer loop and report an error, since the +compressed data is incomplete. Note that we may read more data than is eventually consumed +by inflate(), if the input file continues past the zlib stream. +For applications where zlib streams are embedded in other data, this routine would +need to be modified to return the unused data, or at least indicate how much of the input +data was not used, so the application would know where to pick up after the zlib stream. +
    
    +        strm.avail_in = fread(in, 1, CHUNK, source);
    +        if (ferror(source)) {
    +            (void)inflateEnd(&strm);
    +            return Z_ERRNO;
    +        }
    +        if (strm.avail_in == 0)
    +            break;
    +        strm.next_in = in;
    +
    +The inner do-loop has the same function it did in def(), which is to +keep calling inflate() until has generated all of the output it can with the +provided input. +
    
    +        /* run inflate() on input until output buffer not full */
    +        do {
    +
    +Just like in def(), the same output space is provided for each call of inflate(). +
    
    +            strm.avail_out = CHUNK;
    +            strm.next_out = out;
    +
    +Now we run the decompression engine itself. There is no need to adjust the flush parameter, since +the zlib format is self-terminating. The main difference here is that there are +return values that we need to pay attention to. Z_DATA_ERROR +indicates that inflate() detected an error in the zlib compressed data format, +which means that either the data is not a zlib stream to begin with, or that the data was +corrupted somewhere along the way since it was compressed. The other error to be processed is +Z_MEM_ERROR, which can occur since memory allocation is deferred until inflate() +needs it, unlike deflate(), whose memory is allocated at the start by deflateInit(). +

    +Advanced applications may use +deflateSetDictionary() to prime deflate() with a set of likely data to improve the +first 32K or so of compression. This is noted in the zlib header, so inflate() +requests that that dictionary be provided before it can start to decompress. Without the dictionary, +correct decompression is not possible. For this routine, we have no idea what the dictionary is, +so the Z_NEED_DICT indication is converted to a Z_DATA_ERROR. +

    +inflate() can also return Z_STREAM_ERROR, which should not be possible here, +but could be checked for as noted above for def(). Z_BUF_ERROR does not need to be +checked for here, for the same reasons noted for def(). Z_STREAM_END will be +checked for later. +

    
    +            ret = inflate(&strm, Z_NO_FLUSH);
    +            assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
    +            switch (ret) {
    +            case Z_NEED_DICT:
    +                ret = Z_DATA_ERROR;     /* and fall through */
    +            case Z_DATA_ERROR:
    +            case Z_MEM_ERROR:
    +                (void)inflateEnd(&strm);
    +                return ret;
    +            }
    +
    +The output of inflate() is handled identically to that of deflate(). +
    
    +            have = CHUNK - strm.avail_out;
    +            if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
    +                (void)inflateEnd(&strm);
    +                return Z_ERRNO;
    +            }
    +
    +The inner do-loop ends when inflate() has no more output as indicated +by not filling the output buffer, just as for deflate(). In this case, we cannot +assert that strm.avail_in will be zero, since the deflate stream may end before the file +does. +
    
    +        } while (strm.avail_out == 0);
    +
    +The outer do-loop ends when inflate() reports that it has reached the +end of the input zlib stream, has completed the decompression and integrity +check, and has provided all of the output. This is indicated by the inflate() +return value Z_STREAM_END. The inner loop is guaranteed to leave ret +equal to Z_STREAM_END if the last chunk of the input file read contained the end +of the zlib stream. So if the return value is not Z_STREAM_END, the +loop continues to read more input. +
    
    +        /* done when inflate() says it's done */
    +    } while (ret != Z_STREAM_END);
    +
    +At this point, decompression successfully completed, or we broke out of the loop due to no +more data being available from the input file. If the last inflate() return value +is not Z_STREAM_END, then the zlib stream was incomplete and a data error +is returned. Otherwise, we return with a happy return value. Of course, inflateEnd() +is called first to avoid a memory leak. +
    
    +    /* clean up and return */
    +    (void)inflateEnd(&strm);
    +    return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
    +}
    +
    +That ends the routines that directly use zlib. The following routines make this +a command-line program by running data through the above routines from stdin to +stdout, and handling any errors reported by def() or inf(). +

    +zerr() is used to interpret the possible error codes from def() +and inf(), as detailed in their comments above, and print out an error message. +Note that these are only a subset of the possible return values from deflate() +and inflate(). +

    
    +/* report a zlib or i/o error */
    +void zerr(int ret)
    +{
    +    fputs("zpipe: ", stderr);
    +    switch (ret) {
    +    case Z_ERRNO:
    +        if (ferror(stdin))
    +            fputs("error reading stdin\n", stderr);
    +        if (ferror(stdout))
    +            fputs("error writing stdout\n", stderr);
    +        break;
    +    case Z_STREAM_ERROR:
    +        fputs("invalid compression level\n", stderr);
    +        break;
    +    case Z_DATA_ERROR:
    +        fputs("invalid or incomplete deflate data\n", stderr);
    +        break;
    +    case Z_MEM_ERROR:
    +        fputs("out of memory\n", stderr);
    +        break;
    +    case Z_VERSION_ERROR:
    +        fputs("zlib version mismatch!\n", stderr);
    +    }
    +}
    +
    +Here is the main() routine used to test def() and inf(). The +zpipe command is simply a compression pipe from stdin to stdout, if +no arguments are given, or it is a decompression pipe if zpipe -d is used. If any other +arguments are provided, no compression or decompression is performed. Instead a usage +message is displayed. Examples are zpipe < foo.txt > foo.txt.z to compress, and +zpipe -d < foo.txt.z > foo.txt to decompress. +
    
    +/* compress or decompress from stdin to stdout */
    +int main(int argc, char **argv)
    +{
    +    int ret;
    +
    +    /* do compression if no arguments */
    +    if (argc == 1) {
    +        ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION);
    +        if (ret != Z_OK)
    +            zerr(ret);
    +        return ret;
    +    }
    +
    +    /* do decompression if -d specified */
    +    else if (argc == 2 && strcmp(argv[1], "-d") == 0) {
    +        ret = inf(stdin, stdout);
    +        if (ret != Z_OK)
    +            zerr(ret);
    +        return ret;
    +    }
    +
    +    /* otherwise, report usage */
    +    else {
    +        fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr);
    +        return 1;
    +    }
    +}
    +
    +
    +Copyright (c) 2004 by Mark Adler
    Last modified 13 November 2004
    + + Added: external/zlib/examples/zpipe.c ============================================================================== --- (empty file) +++ external/zlib/examples/zpipe.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,191 @@ +/* zpipe.c: example of proper use of zlib's inflate() and deflate() + Not copyrighted -- provided to the public domain + Version 1.2 9 November 2004 Mark Adler */ + +/* Version history: + 1.0 30 Oct 2004 First version + 1.1 8 Nov 2004 Add void casting for unused return values + Use switch statement for inflate() return values + 1.2 9 Nov 2004 Add assertions to document zlib guarantees + 1.3 6 Apr 2005 Remove incorrect assertion in inf() + */ + +#include +#include +#include +#include "zlib.h" + +#define CHUNK 16384 + +/* Compress from file source to file dest until EOF on source. + def() returns Z_OK on success, Z_MEM_ERROR if memory could not be + allocated for processing, Z_STREAM_ERROR if an invalid compression + level is supplied, Z_VERSION_ERROR if the version of zlib.h and the + version of the library linked do not match, or Z_ERRNO if there is + an error reading or writing the files. */ +int def(FILE *source, FILE *dest, int level) +{ + int ret, flush; + unsigned have; + z_stream strm; + char in[CHUNK]; + char out[CHUNK]; + + /* allocate deflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + ret = deflateInit(&strm, level); + if (ret != Z_OK) + return ret; + + /* compress until end of file */ + do { + strm.avail_in = fread(in, 1, CHUNK, source); + if (ferror(source)) { + (void)deflateEnd(&strm); + return Z_ERRNO; + } + flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; + strm.next_in = in; + + /* run deflate() on input until output buffer not full, finish + compression if all of source has been read in */ + do { + strm.avail_out = CHUNK; + strm.next_out = out; + ret = deflate(&strm, flush); /* no bad return value */ + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ + have = CHUNK - strm.avail_out; + if (fwrite(out, 1, have, dest) != have || ferror(dest)) { + (void)deflateEnd(&strm); + return Z_ERRNO; + } + } while (strm.avail_out == 0); + assert(strm.avail_in == 0); /* all input will be used */ + + /* done when last data in file processed */ + } while (flush != Z_FINISH); + assert(ret == Z_STREAM_END); /* stream will be complete */ + + /* clean up and return */ + (void)deflateEnd(&strm); + return Z_OK; +} + +/* Decompress from file source to file dest until stream ends or EOF. + inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be + allocated for processing, Z_DATA_ERROR if the deflate data is + invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and + the version of the library linked do not match, or Z_ERRNO if there + is an error reading or writing the files. */ +int inf(FILE *source, FILE *dest) +{ + int ret; + unsigned have; + z_stream strm; + char in[CHUNK]; + char out[CHUNK]; + + /* allocate inflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit(&strm); + if (ret != Z_OK) + return ret; + + /* decompress until deflate stream ends or end of file */ + do { + strm.avail_in = fread(in, 1, CHUNK, source); + if (ferror(source)) { + (void)inflateEnd(&strm); + return Z_ERRNO; + } + if (strm.avail_in == 0) + break; + strm.next_in = in; + + /* run inflate() on input until output buffer not full */ + do { + strm.avail_out = CHUNK; + strm.next_out = out; + ret = inflate(&strm, Z_NO_FLUSH); + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ + switch (ret) { + case Z_NEED_DICT: + ret = Z_DATA_ERROR; /* and fall through */ + case Z_DATA_ERROR: + case Z_MEM_ERROR: + (void)inflateEnd(&strm); + return ret; + } + have = CHUNK - strm.avail_out; + if (fwrite(out, 1, have, dest) != have || ferror(dest)) { + (void)inflateEnd(&strm); + return Z_ERRNO; + } + } while (strm.avail_out == 0); + + /* done when inflate() says it's done */ + } while (ret != Z_STREAM_END); + + /* clean up and return */ + (void)inflateEnd(&strm); + return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; +} + +/* report a zlib or i/o error */ +void zerr(int ret) +{ + fputs("zpipe: ", stderr); + switch (ret) { + case Z_ERRNO: + if (ferror(stdin)) + fputs("error reading stdin\n", stderr); + if (ferror(stdout)) + fputs("error writing stdout\n", stderr); + break; + case Z_STREAM_ERROR: + fputs("invalid compression level\n", stderr); + break; + case Z_DATA_ERROR: + fputs("invalid or incomplete deflate data\n", stderr); + break; + case Z_MEM_ERROR: + fputs("out of memory\n", stderr); + break; + case Z_VERSION_ERROR: + fputs("zlib version mismatch!\n", stderr); + } +} + +/* compress or decompress from stdin to stdout */ +int main(int argc, char **argv) +{ + int ret; + + /* do compression if no arguments */ + if (argc == 1) { + ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION); + if (ret != Z_OK) + zerr(ret); + return ret; + } + + /* do decompression if -d specified */ + else if (argc == 2 && strcmp(argv[1], "-d") == 0) { + ret = inf(stdin, stdout); + if (ret != Z_OK) + zerr(ret); + return ret; + } + + /* otherwise, report usage */ + else { + fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr); + return 1; + } +} Added: external/zlib/examples/zran.c ============================================================================== --- (empty file) +++ external/zlib/examples/zran.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,404 @@ +/* zran.c -- example of zlib/gzip stream indexing and random access + * Copyright (C) 2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + Version 1.0 29 May 2005 Mark Adler */ + +/* Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary() + for random access of a compressed file. A file containing a zlib or gzip + stream is provided on the command line. The compressed stream is decoded in + its entirety, and an index built with access points about every SPAN bytes + in the uncompressed output. The compressed file is left open, and can then + be read randomly, having to decompress on the average SPAN/2 uncompressed + bytes before getting to the desired block of data. + + An access point can be created at the start of any deflate block, by saving + the starting file offset and bit of that block, and the 32K bytes of + uncompressed data that precede that block. Also the uncompressed offset of + that block is saved to provide a referece for locating a desired starting + point in the uncompressed stream. build_index() works by decompressing the + input zlib or gzip stream a block at a time, and at the end of each block + deciding if enough uncompressed data has gone by to justify the creation of + a new access point. If so, that point is saved in a data structure that + grows as needed to accommodate the points. + + To use the index, an offset in the uncompressed data is provided, for which + the latest accees point at or preceding that offset is located in the index. + The input file is positioned to the specified location in the index, and if + necessary the first few bits of the compressed data is read from the file. + inflate is initialized with those bits and the 32K of uncompressed data, and + the decompression then proceeds until the desired offset in the file is + reached. Then the decompression continues to read the desired uncompressed + data from the file. + + Another approach would be to generate the index on demand. In that case, + requests for random access reads from the compressed data would try to use + the index, but if a read far enough past the end of the index is required, + then further index entries would be generated and added. + + There is some fair bit of overhead to starting inflation for the random + access, mainly copying the 32K byte dictionary. So if small pieces of the + file are being accessed, it would make sense to implement a cache to hold + some lookahead and avoid many calls to extract() for small lengths. + + Another way to build an index would be to use inflateCopy(). That would + not be constrained to have access points at block boundaries, but requires + more memory per access point, and also cannot be saved to file due to the + use of pointers in the state. The approach here allows for storage of the + index in a file. + */ + +#include +#include +#include +#include "zlib.h" + +#define local static + +#define SPAN 1048576L /* desired distance between access points */ +#define WINSIZE 32768U /* sliding window size */ +#define CHUNK 16384 /* file input buffer size */ + +/* access point entry */ +struct point { + off_t out; /* corresponding offset in uncompressed data */ + off_t in; /* offset in input file of first full byte */ + int bits; /* number of bits (1-7) from byte at in - 1, or 0 */ + unsigned char window[WINSIZE]; /* preceding 32K of uncompressed data */ +}; + +/* access point list */ +struct access { + int have; /* number of list entries filled in */ + int size; /* number of list entries allocated */ + struct point *list; /* allocated list */ +}; + +/* Deallocate an index built by build_index() */ +local void free_index(struct access *index) +{ + if (index != NULL) { + free(index->list); + free(index); + } +} + +/* Add an entry to the access point list. If out of memory, deallocate the + existing list and return NULL. */ +local struct access *addpoint(struct access *index, int bits, + off_t in, off_t out, unsigned left, unsigned char *window) +{ + struct point *next; + + /* if list is empty, create it (start with eight points) */ + if (index == NULL) { + index = malloc(sizeof(struct access)); + if (index == NULL) return NULL; + index->list = malloc(sizeof(struct point) << 3); + if (index->list == NULL) { + free(index); + return NULL; + } + index->size = 8; + index->have = 0; + } + + /* if list is full, make it bigger */ + else if (index->have == index->size) { + index->size <<= 1; + next = realloc(index->list, sizeof(struct point) * index->size); + if (next == NULL) { + free_index(index); + return NULL; + } + index->list = next; + } + + /* fill in entry and increment how many we have */ + next = index->list + index->have; + next->bits = bits; + next->in = in; + next->out = out; + if (left) + memcpy(next->window, window + WINSIZE - left, left); + if (left < WINSIZE) + memcpy(next->window + left, window, WINSIZE - left); + index->have++; + + /* return list, possibly reallocated */ + return index; +} + +/* Make one entire pass through the compressed stream and build an index, with + access points about every span bytes of uncompressed output -- span is + chosen to balance the speed of random access against the memory requirements + of the list, about 32K bytes per access point. Note that data after the end + of the first zlib or gzip stream in the file is ignored. build_index() + returns the number of access points on success (>= 1), Z_MEM_ERROR for out + of memory, Z_DATA_ERROR for an error in the input file, or Z_ERRNO for a + file read error. On success, *built points to the resulting index. */ +local int build_index(FILE *in, off_t span, struct access **built) +{ + int ret; + off_t totin, totout; /* our own total counters to avoid 4GB limit */ + off_t last; /* totout value of last access point */ + struct access *index; /* access points being generated */ + z_stream strm; + unsigned char input[CHUNK]; + unsigned char window[WINSIZE]; + + /* initialize inflate */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, 47); /* automatic zlib or gzip decoding */ + if (ret != Z_OK) + return ret; + + /* inflate the input, maintain a sliding window, and build an index -- this + also validates the integrity of the compressed data using the check + information at the end of the gzip or zlib stream */ + totin = totout = last = 0; + index = NULL; /* will be allocated by first addpoint() */ + strm.avail_out = 0; + do { + /* get some compressed data from input file */ + strm.avail_in = fread(input, 1, CHUNK, in); + if (ferror(in)) { + ret = Z_ERRNO; + goto build_index_error; + } + if (strm.avail_in == 0) { + ret = Z_DATA_ERROR; + goto build_index_error; + } + strm.next_in = input; + + /* process all of that, or until end of stream */ + do { + /* reset sliding window if necessary */ + if (strm.avail_out == 0) { + strm.avail_out = WINSIZE; + strm.next_out = window; + } + + /* inflate until out of input, output, or at end of block -- + update the total input and output counters */ + totin += strm.avail_in; + totout += strm.avail_out; + ret = inflate(&strm, Z_BLOCK); /* return at end of block */ + totin -= strm.avail_in; + totout -= strm.avail_out; + if (ret == Z_NEED_DICT) + ret = Z_DATA_ERROR; + if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR) + goto build_index_error; + if (ret == Z_STREAM_END) + break; + + /* if at end of block, consider adding an index entry (note that if + data_type indicates an end-of-block, then all of the + uncompressed data from that block has been delivered, and none + of the compressed data after that block has been consumed, + except for up to seven bits) -- the totout == 0 provides an + entry point after the zlib or gzip header, and assures that the + index always has at least one access point; we avoid creating an + access point after the last block by checking bit 6 of data_type + */ + if ((strm.data_type & 128) && !(strm.data_type & 64) && + (totout == 0 || totout - last > span)) { + index = addpoint(index, strm.data_type & 7, totin, + totout, strm.avail_out, window); + if (index == NULL) { + ret = Z_MEM_ERROR; + goto build_index_error; + } + last = totout; + } + } while (strm.avail_in != 0); + } while (ret != Z_STREAM_END); + + /* clean up and return index (release unused entries in list) */ + (void)inflateEnd(&strm); + index = realloc(index, sizeof(struct point) * index->have); + index->size = index->have; + *built = index; + return index->size; + + /* return error */ + build_index_error: + (void)inflateEnd(&strm); + if (index != NULL) + free_index(index); + return ret; +} + +/* Use the index to read len bytes from offset into buf, return bytes read or + negative for error (Z_DATA_ERROR or Z_MEM_ERROR). If data is requested past + the end of the uncompressed data, then extract() will return a value less + than len, indicating how much as actually read into buf. This function + should not return a data error unless the file was modified since the index + was generated. extract() may also return Z_ERRNO if there is an error on + reading or seeking the input file. */ +local int extract(FILE *in, struct access *index, off_t offset, + unsigned char *buf, int len) +{ + int ret, skip; + z_stream strm; + struct point *here; + unsigned char input[CHUNK]; + unsigned char discard[WINSIZE]; + + /* proceed only if something reasonable to do */ + if (len < 0) + return 0; + + /* find where in stream to start */ + here = index->list; + ret = index->have; + while (--ret && here[1].out <= offset) + here++; + + /* initialize file and inflate state to start there */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, -15); /* raw inflate */ + if (ret != Z_OK) + return ret; + ret = fseeko(in, here->in - (here->bits ? 1 : 0), SEEK_SET); + if (ret == -1) + goto extract_ret; + if (here->bits) { + ret = getc(in); + if (ret == -1) { + ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR; + goto extract_ret; + } + (void)inflatePrime(&strm, here->bits, ret >> (8 - here->bits)); + } + (void)inflateSetDictionary(&strm, here->window, WINSIZE); + + /* skip uncompressed bytes until offset reached, then satisfy request */ + offset -= here->out; + strm.avail_in = 0; + skip = 1; /* while skipping to offset */ + do { + /* define where to put uncompressed data, and how much */ + if (offset == 0 && skip) { /* at offset now */ + strm.avail_out = len; + strm.next_out = buf; + skip = 0; /* only do this once */ + } + if (offset > WINSIZE) { /* skip WINSIZE bytes */ + strm.avail_out = WINSIZE; + strm.next_out = discard; + offset -= WINSIZE; + } + else if (offset != 0) { /* last skip */ + strm.avail_out = (unsigned)offset; + strm.next_out = discard; + offset = 0; + } + + /* uncompress until avail_out filled, or end of stream */ + do { + if (strm.avail_in == 0) { + strm.avail_in = fread(input, 1, CHUNK, in); + if (ferror(in)) { + ret = Z_ERRNO; + goto extract_ret; + } + if (strm.avail_in == 0) { + ret = Z_DATA_ERROR; + goto extract_ret; + } + strm.next_in = input; + } + ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */ + if (ret == Z_NEED_DICT) + ret = Z_DATA_ERROR; + if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR) + goto extract_ret; + if (ret == Z_STREAM_END) + break; + } while (strm.avail_out != 0); + + /* if reach end of stream, then don't keep trying to get more */ + if (ret == Z_STREAM_END) + break; + + /* do until offset reached and requested data read, or stream ends */ + } while (skip); + + /* compute number of uncompressed bytes read after offset */ + ret = skip ? 0 : len - strm.avail_out; + + /* clean up and return bytes read or error */ + extract_ret: + (void)inflateEnd(&strm); + return ret; +} + +/* Demonstrate the use of build_index() and extract() by processing the file + provided on the command line, and the extracting 16K from about 2/3rds of + the way through the uncompressed output, and writing that to stdout. */ +int main(int argc, char **argv) +{ + int len; + off_t offset; + FILE *in; + struct access *index; + unsigned char buf[CHUNK]; + + /* open input file */ + if (argc != 2) { + fprintf(stderr, "usage: zran file.gz\n"); + return 1; + } + in = fopen(argv[1], "rb"); + if (in == NULL) { + fprintf(stderr, "zran: could not open %s for reading\n", argv[1]); + return 1; + } + + /* build index */ + len = build_index(in, SPAN, &index); + if (len < 0) { + fclose(in); + switch (len) { + case Z_MEM_ERROR: + fprintf(stderr, "zran: out of memory\n"); + break; + case Z_DATA_ERROR: + fprintf(stderr, "zran: compressed data error in %s\n", argv[1]); + break; + case Z_ERRNO: + fprintf(stderr, "zran: read error on %s\n", argv[1]); + break; + default: + fprintf(stderr, "zran: error %d while building index\n", len); + } + return 1; + } + fprintf(stderr, "zran: built index with %d access points\n", len); + + /* use index by reading some bytes from an arbitrary offset */ + offset = (index->list[index->have - 1].out << 1) / 3; + len = extract(in, index, offset, buf, CHUNK); + if (len < 0) + fprintf(stderr, "zran: extraction failed: %s error\n", + len == Z_MEM_ERROR ? "out of memory" : "input corrupted"); + else { + fwrite(buf, 1, len, stdout); + fprintf(stderr, "zran: extracted %d bytes at %llu\n", len, offset); + } + + /* clean up and exit */ + free_index(index); + fclose(in); + return 0; +} Added: external/zlib/gzio.c ============================================================================== --- (empty file) +++ external/zlib/gzio.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,1026 @@ +/* gzio.c -- IO on .gz files + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Compile this file with -DNO_GZCOMPRESS to avoid the compression code. + */ + +/* @(#) $Id$ */ + +#include + +#include "zutil.h" + +#ifdef NO_DEFLATE /* for compatibility with old definition */ +# define NO_GZCOMPRESS +#endif + +#ifndef NO_DUMMY_DECL +struct internal_state {int dummy;}; /* for buggy compilers */ +#endif + +#ifndef Z_BUFSIZE +# ifdef MAXSEG_64K +# define Z_BUFSIZE 4096 /* minimize memory usage for 16-bit DOS */ +# else +# define Z_BUFSIZE 16384 +# endif +#endif +#ifndef Z_PRINTF_BUFSIZE +# define Z_PRINTF_BUFSIZE 4096 +#endif + +#ifdef __MVS__ +# pragma map (fdopen , "\174\174FDOPEN") + FILE *fdopen(int, const char *); +#endif + +#ifndef STDC +extern voidp malloc OF((uInt size)); +extern void free OF((voidpf ptr)); +#endif + +#define ALLOC(size) malloc(size) +#define TRYFREE(p) {if (p) free(p);} + +static int const gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */ + +/* gzip flag byte */ +#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ +#define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ +#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ +#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ +#define COMMENT 0x10 /* bit 4 set: file comment present */ +#define RESERVED 0xE0 /* bits 5..7: reserved */ + +typedef struct gz_stream { + z_stream stream; + int z_err; /* error code for last stream operation */ + int z_eof; /* set if end of input file */ + FILE *file; /* .gz file */ + Byte *inbuf; /* input buffer */ + Byte *outbuf; /* output buffer */ + uLong crc; /* crc32 of uncompressed data */ + char *msg; /* error message */ + char *path; /* path name for debugging only */ + int transparent; /* 1 if input file is not a .gz file */ + char mode; /* 'w' or 'r' */ + z_off_t start; /* start of compressed data in file (header skipped) */ + z_off_t in; /* bytes into deflate or inflate */ + z_off_t out; /* bytes out of deflate or inflate */ + int back; /* one character push-back */ + int last; /* true if push-back is last character */ +} gz_stream; + + +local gzFile gz_open OF((const char *path, const char *mode, int fd)); +local int do_flush OF((gzFile file, int flush)); +local int get_byte OF((gz_stream *s)); +local void check_header OF((gz_stream *s)); +local int destroy OF((gz_stream *s)); +local void putLong OF((FILE *file, uLong x)); +local uLong getLong OF((gz_stream *s)); + +/* =========================================================================== + Opens a gzip (.gz) file for reading or writing. The mode parameter + is as in fopen ("rb" or "wb"). The file is given either by file descriptor + or path name (if fd == -1). + gz_open returns NULL if the file could not be opened or if there was + insufficient memory to allocate the (de)compression state; errno + can be checked to distinguish the two cases (if errno is zero, the + zlib error is Z_MEM_ERROR). +*/ +local gzFile gz_open (path, mode, fd) + const char *path; + const char *mode; + int fd; +{ + int err; + int level = Z_DEFAULT_COMPRESSION; /* compression level */ + int strategy = Z_DEFAULT_STRATEGY; /* compression strategy */ + char *p = (char*)mode; + gz_stream *s; + char fmode[80]; /* copy of mode, without the compression level */ + char *m = fmode; + + if (!path || !mode) return Z_NULL; + + s = (gz_stream *)ALLOC(sizeof(gz_stream)); + if (!s) return Z_NULL; + + s->stream.zalloc = (alloc_func)0; + s->stream.zfree = (free_func)0; + s->stream.opaque = (voidpf)0; + s->stream.next_in = s->inbuf = Z_NULL; + s->stream.next_out = s->outbuf = Z_NULL; + s->stream.avail_in = s->stream.avail_out = 0; + s->file = NULL; + s->z_err = Z_OK; + s->z_eof = 0; + s->in = 0; + s->out = 0; + s->back = EOF; + s->crc = crc32(0L, Z_NULL, 0); + s->msg = NULL; + s->transparent = 0; + + s->path = (char*)ALLOC(strlen(path)+1); + if (s->path == NULL) { + return destroy(s), (gzFile)Z_NULL; + } + strcpy(s->path, path); /* do this early for debugging */ + + s->mode = '\0'; + do { + if (*p == 'r') s->mode = 'r'; + if (*p == 'w' || *p == 'a') s->mode = 'w'; + if (*p >= '0' && *p <= '9') { + level = *p - '0'; + } else if (*p == 'f') { + strategy = Z_FILTERED; + } else if (*p == 'h') { + strategy = Z_HUFFMAN_ONLY; + } else if (*p == 'R') { + strategy = Z_RLE; + } else { + *m++ = *p; /* copy the mode */ + } + } while (*p++ && m != fmode + sizeof(fmode)); + if (s->mode == '\0') return destroy(s), (gzFile)Z_NULL; + + if (s->mode == 'w') { +#ifdef NO_GZCOMPRESS + err = Z_STREAM_ERROR; +#else + err = deflateInit2(&(s->stream), level, + Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, strategy); + /* windowBits is passed < 0 to suppress zlib header */ + + s->stream.next_out = s->outbuf = (Byte*)ALLOC(Z_BUFSIZE); +#endif + if (err != Z_OK || s->outbuf == Z_NULL) { + return destroy(s), (gzFile)Z_NULL; + } + } else { + s->stream.next_in = s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); + + err = inflateInit2(&(s->stream), -MAX_WBITS); + /* windowBits is passed < 0 to tell that there is no zlib header. + * Note that in this case inflate *requires* an extra "dummy" byte + * after the compressed stream in order to complete decompression and + * return Z_STREAM_END. Here the gzip CRC32 ensures that 4 bytes are + * present after the compressed stream. + */ + if (err != Z_OK || s->inbuf == Z_NULL) { + return destroy(s), (gzFile)Z_NULL; + } + } + s->stream.avail_out = Z_BUFSIZE; + + errno = 0; + s->file = fd < 0 ? F_OPEN(path, fmode) : (FILE*)fdopen(fd, fmode); + + if (s->file == NULL) { + return destroy(s), (gzFile)Z_NULL; + } + if (s->mode == 'w') { + /* Write a very simple .gz header: + */ + fprintf(s->file, "%c%c%c%c%c%c%c%c%c%c", gz_magic[0], gz_magic[1], + Z_DEFLATED, 0 /*flags*/, 0,0,0,0 /*time*/, 0 /*xflags*/, OS_CODE); + s->start = 10L; + /* We use 10L instead of ftell(s->file) to because ftell causes an + * fflush on some systems. This version of the library doesn't use + * start anyway in write mode, so this initialization is not + * necessary. + */ + } else { + check_header(s); /* skip the .gz header */ + s->start = ftell(s->file) - s->stream.avail_in; + } + + return (gzFile)s; +} + +/* =========================================================================== + Opens a gzip (.gz) file for reading or writing. +*/ +gzFile ZEXPORT gzopen (path, mode) + const char *path; + const char *mode; +{ + return gz_open (path, mode, -1); +} + +/* =========================================================================== + Associate a gzFile with the file descriptor fd. fd is not dup'ed here + to mimic the behavio(u)r of fdopen. +*/ +gzFile ZEXPORT gzdopen (fd, mode) + int fd; + const char *mode; +{ + char name[46]; /* allow for up to 128-bit integers */ + + if (fd < 0) return (gzFile)Z_NULL; + sprintf(name, "", fd); /* for debugging */ + + return gz_open (name, mode, fd); +} + +/* =========================================================================== + * Update the compression level and strategy + */ +int ZEXPORT gzsetparams (file, level, strategy) + gzFile file; + int level; + int strategy; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; + + /* Make room to allow flushing */ + if (s->stream.avail_out == 0) { + + s->stream.next_out = s->outbuf; + if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) { + s->z_err = Z_ERRNO; + } + s->stream.avail_out = Z_BUFSIZE; + } + + return deflateParams (&(s->stream), level, strategy); +} + +/* =========================================================================== + Read a byte from a gz_stream; update next_in and avail_in. Return EOF + for end of file. + IN assertion: the stream s has been sucessfully opened for reading. +*/ +local int get_byte(s) + gz_stream *s; +{ + if (s->z_eof) return EOF; + if (s->stream.avail_in == 0) { + errno = 0; + s->stream.avail_in = (uInt)fread(s->inbuf, 1, Z_BUFSIZE, s->file); + if (s->stream.avail_in == 0) { + s->z_eof = 1; + if (ferror(s->file)) s->z_err = Z_ERRNO; + return EOF; + } + s->stream.next_in = s->inbuf; + } + s->stream.avail_in--; + return *(s->stream.next_in)++; +} + +/* =========================================================================== + Check the gzip header of a gz_stream opened for reading. Set the stream + mode to transparent if the gzip magic header is not present; set s->err + to Z_DATA_ERROR if the magic header is present but the rest of the header + is incorrect. + IN assertion: the stream s has already been created sucessfully; + s->stream.avail_in is zero for the first time, but may be non-zero + for concatenated .gz files. +*/ +local void check_header(s) + gz_stream *s; +{ + int method; /* method byte */ + int flags; /* flags byte */ + uInt len; + int c; + + /* Assure two bytes in the buffer so we can peek ahead -- handle case + where first byte of header is at the end of the buffer after the last + gzip segment */ + len = s->stream.avail_in; + if (len < 2) { + if (len) s->inbuf[0] = s->stream.next_in[0]; + errno = 0; + len = (uInt)fread(s->inbuf + len, 1, Z_BUFSIZE >> len, s->file); + if (len == 0 && ferror(s->file)) s->z_err = Z_ERRNO; + s->stream.avail_in += len; + s->stream.next_in = s->inbuf; + if (s->stream.avail_in < 2) { + s->transparent = s->stream.avail_in; + return; + } + } + + /* Peek ahead to check the gzip magic header */ + if (s->stream.next_in[0] != gz_magic[0] || + s->stream.next_in[1] != gz_magic[1]) { + s->transparent = 1; + return; + } + s->stream.avail_in -= 2; + s->stream.next_in += 2; + + /* Check the rest of the gzip header */ + method = get_byte(s); + flags = get_byte(s); + if (method != Z_DEFLATED || (flags & RESERVED) != 0) { + s->z_err = Z_DATA_ERROR; + return; + } + + /* Discard time, xflags and OS code: */ + for (len = 0; len < 6; len++) (void)get_byte(s); + + if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */ + len = (uInt)get_byte(s); + len += ((uInt)get_byte(s))<<8; + /* len is garbage if EOF but the loop below will quit anyway */ + while (len-- != 0 && get_byte(s) != EOF) ; + } + if ((flags & ORIG_NAME) != 0) { /* skip the original file name */ + while ((c = get_byte(s)) != 0 && c != EOF) ; + } + if ((flags & COMMENT) != 0) { /* skip the .gz file comment */ + while ((c = get_byte(s)) != 0 && c != EOF) ; + } + if ((flags & HEAD_CRC) != 0) { /* skip the header crc */ + for (len = 0; len < 2; len++) (void)get_byte(s); + } + s->z_err = s->z_eof ? Z_DATA_ERROR : Z_OK; +} + + /* =========================================================================== + * Cleanup then free the given gz_stream. Return a zlib error code. + Try freeing in the reverse order of allocations. + */ +local int destroy (s) + gz_stream *s; +{ + int err = Z_OK; + + if (!s) return Z_STREAM_ERROR; + + TRYFREE(s->msg); + + if (s->stream.state != NULL) { + if (s->mode == 'w') { +#ifdef NO_GZCOMPRESS + err = Z_STREAM_ERROR; +#else + err = deflateEnd(&(s->stream)); +#endif + } else if (s->mode == 'r') { + err = inflateEnd(&(s->stream)); + } + } + if (s->file != NULL && fclose(s->file)) { +#ifdef ESPIPE + if (errno != ESPIPE) /* fclose is broken for pipes in HP/UX */ +#endif + err = Z_ERRNO; + } + if (s->z_err < 0) err = s->z_err; + + TRYFREE(s->inbuf); + TRYFREE(s->outbuf); + TRYFREE(s->path); + TRYFREE(s); + return err; +} + +/* =========================================================================== + Reads the given number of uncompressed bytes from the compressed file. + gzread returns the number of bytes actually read (0 for end of file). +*/ +int ZEXPORT gzread (file, buf, len) + gzFile file; + voidp buf; + unsigned len; +{ + gz_stream *s = (gz_stream*)file; + Bytef *start = (Bytef*)buf; /* starting point for crc computation */ + Byte *next_out; /* == stream.next_out but not forced far (for MSDOS) */ + + if (s == NULL || s->mode != 'r') return Z_STREAM_ERROR; + + if (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO) return -1; + if (s->z_err == Z_STREAM_END) return 0; /* EOF */ + + next_out = (Byte*)buf; + s->stream.next_out = (Bytef*)buf; + s->stream.avail_out = len; + + if (s->stream.avail_out && s->back != EOF) { + *next_out++ = s->back; + s->stream.next_out++; + s->stream.avail_out--; + s->back = EOF; + s->out++; + start++; + if (s->last) { + s->z_err = Z_STREAM_END; + return 1; + } + } + + while (s->stream.avail_out != 0) { + + if (s->transparent) { + /* Copy first the lookahead bytes: */ + uInt n = s->stream.avail_in; + if (n > s->stream.avail_out) n = s->stream.avail_out; + if (n > 0) { + zmemcpy(s->stream.next_out, s->stream.next_in, n); + next_out += n; + s->stream.next_out = next_out; + s->stream.next_in += n; + s->stream.avail_out -= n; + s->stream.avail_in -= n; + } + if (s->stream.avail_out > 0) { + s->stream.avail_out -= + (uInt)fread(next_out, 1, s->stream.avail_out, s->file); + } + len -= s->stream.avail_out; + s->in += len; + s->out += len; + if (len == 0) s->z_eof = 1; + return (int)len; + } + if (s->stream.avail_in == 0 && !s->z_eof) { + + errno = 0; + s->stream.avail_in = (uInt)fread(s->inbuf, 1, Z_BUFSIZE, s->file); + if (s->stream.avail_in == 0) { + s->z_eof = 1; + if (ferror(s->file)) { + s->z_err = Z_ERRNO; + break; + } + } + s->stream.next_in = s->inbuf; + } + s->in += s->stream.avail_in; + s->out += s->stream.avail_out; + s->z_err = inflate(&(s->stream), Z_NO_FLUSH); + s->in -= s->stream.avail_in; + s->out -= s->stream.avail_out; + + if (s->z_err == Z_STREAM_END) { + /* Check CRC and original size */ + s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start)); + start = s->stream.next_out; + + if (getLong(s) != s->crc) { + s->z_err = Z_DATA_ERROR; + } else { + (void)getLong(s); + /* The uncompressed length returned by above getlong() may be + * different from s->out in case of concatenated .gz files. + * Check for such files: + */ + check_header(s); + if (s->z_err == Z_OK) { + inflateReset(&(s->stream)); + s->crc = crc32(0L, Z_NULL, 0); + } + } + } + if (s->z_err != Z_OK || s->z_eof) break; + } + s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start)); + + if (len == s->stream.avail_out && + (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO)) + return -1; + return (int)(len - s->stream.avail_out); +} + + +/* =========================================================================== + Reads one byte from the compressed file. gzgetc returns this byte + or -1 in case of end of file or error. +*/ +int ZEXPORT gzgetc(file) + gzFile file; +{ + unsigned char c; + + return gzread(file, &c, 1) == 1 ? c : -1; +} + + +/* =========================================================================== + Push one byte back onto the stream. +*/ +int ZEXPORT gzungetc(c, file) + int c; + gzFile file; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'r' || c == EOF || s->back != EOF) return EOF; + s->back = c; + s->out--; + s->last = (s->z_err == Z_STREAM_END); + if (s->last) s->z_err = Z_OK; + s->z_eof = 0; + return c; +} + + +/* =========================================================================== + Reads bytes from the compressed file until len-1 characters are + read, or a newline character is read and transferred to buf, or an + end-of-file condition is encountered. The string is then terminated + with a null character. + gzgets returns buf, or Z_NULL in case of error. + + The current implementation is not optimized at all. +*/ +char * ZEXPORT gzgets(file, buf, len) + gzFile file; + char *buf; + int len; +{ + char *b = buf; + if (buf == Z_NULL || len <= 0) return Z_NULL; + + while (--len > 0 && gzread(file, buf, 1) == 1 && *buf++ != '\n') ; + *buf = '\0'; + return b == buf && len > 0 ? Z_NULL : b; +} + + +#ifndef NO_GZCOMPRESS +/* =========================================================================== + Writes the given number of uncompressed bytes into the compressed file. + gzwrite returns the number of bytes actually written (0 in case of error). +*/ +int ZEXPORT gzwrite (file, buf, len) + gzFile file; + voidpc buf; + unsigned len; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; + + s->stream.next_in = (Bytef*)buf; + s->stream.avail_in = len; + + while (s->stream.avail_in != 0) { + + if (s->stream.avail_out == 0) { + + s->stream.next_out = s->outbuf; + if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) { + s->z_err = Z_ERRNO; + break; + } + s->stream.avail_out = Z_BUFSIZE; + } + s->in += s->stream.avail_in; + s->out += s->stream.avail_out; + s->z_err = deflate(&(s->stream), Z_NO_FLUSH); + s->in -= s->stream.avail_in; + s->out -= s->stream.avail_out; + if (s->z_err != Z_OK) break; + } + s->crc = crc32(s->crc, (const Bytef *)buf, len); + + return (int)(len - s->stream.avail_in); +} + + +/* =========================================================================== + Converts, formats, and writes the args to the compressed file under + control of the format string, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written (0 in case of error). +*/ +#ifdef STDC +#include + +int ZEXPORTVA gzprintf (gzFile file, const char *format, /* args */ ...) +{ + char buf[Z_PRINTF_BUFSIZE]; + va_list va; + int len; + + buf[sizeof(buf) - 1] = 0; + va_start(va, format); +#ifdef NO_vsnprintf +# ifdef HAS_vsprintf_void + (void)vsprintf(buf, format, va); + va_end(va); + for (len = 0; len < sizeof(buf); len++) + if (buf[len] == 0) break; +# else + len = vsprintf(buf, format, va); + va_end(va); +# endif +#else +# ifdef HAS_vsnprintf_void + (void)vsnprintf(buf, sizeof(buf), format, va); + va_end(va); + len = strlen(buf); +# else + len = vsnprintf(buf, sizeof(buf), format, va); + va_end(va); +# endif +#endif + if (len <= 0 || len >= (int)sizeof(buf) || buf[sizeof(buf) - 1] != 0) + return 0; + return gzwrite(file, buf, (unsigned)len); +} +#else /* not ANSI C */ + +int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, + a11, a12, a13, a14, a15, a16, a17, a18, a19, a20) + gzFile file; + const char *format; + int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, + a11, a12, a13, a14, a15, a16, a17, a18, a19, a20; +{ + char buf[Z_PRINTF_BUFSIZE]; + int len; + + buf[sizeof(buf) - 1] = 0; +#ifdef NO_snprintf +# ifdef HAS_sprintf_void + sprintf(buf, format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); + for (len = 0; len < sizeof(buf); len++) + if (buf[len] == 0) break; +# else + len = sprintf(buf, format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); +# endif +#else +# ifdef HAS_snprintf_void + snprintf(buf, sizeof(buf), format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); + len = strlen(buf); +# else + len = snprintf(buf, sizeof(buf), format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); +# endif +#endif + if (len <= 0 || len >= sizeof(buf) || buf[sizeof(buf) - 1] != 0) + return 0; + return gzwrite(file, buf, len); +} +#endif + +/* =========================================================================== + Writes c, converted to an unsigned char, into the compressed file. + gzputc returns the value that was written, or -1 in case of error. +*/ +int ZEXPORT gzputc(file, c) + gzFile file; + int c; +{ + unsigned char cc = (unsigned char) c; /* required for big endian systems */ + + return gzwrite(file, &cc, 1) == 1 ? (int)cc : -1; +} + + +/* =========================================================================== + Writes the given null-terminated string to the compressed file, excluding + the terminating null character. + gzputs returns the number of characters written, or -1 in case of error. +*/ +int ZEXPORT gzputs(file, s) + gzFile file; + const char *s; +{ + return gzwrite(file, (char*)s, (unsigned)strlen(s)); +} + + +/* =========================================================================== + Flushes all pending output into the compressed file. The parameter + flush is as in the deflate() function. +*/ +local int do_flush (file, flush) + gzFile file; + int flush; +{ + uInt len; + int done = 0; + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; + + s->stream.avail_in = 0; /* should be zero already anyway */ + + for (;;) { + len = Z_BUFSIZE - s->stream.avail_out; + + if (len != 0) { + if ((uInt)fwrite(s->outbuf, 1, len, s->file) != len) { + s->z_err = Z_ERRNO; + return Z_ERRNO; + } + s->stream.next_out = s->outbuf; + s->stream.avail_out = Z_BUFSIZE; + } + if (done) break; + s->out += s->stream.avail_out; + s->z_err = deflate(&(s->stream), flush); + s->out -= s->stream.avail_out; + + /* Ignore the second of two consecutive flushes: */ + if (len == 0 && s->z_err == Z_BUF_ERROR) s->z_err = Z_OK; + + /* deflate has finished flushing only when it hasn't used up + * all the available space in the output buffer: + */ + done = (s->stream.avail_out != 0 || s->z_err == Z_STREAM_END); + + if (s->z_err != Z_OK && s->z_err != Z_STREAM_END) break; + } + return s->z_err == Z_STREAM_END ? Z_OK : s->z_err; +} + +int ZEXPORT gzflush (file, flush) + gzFile file; + int flush; +{ + gz_stream *s = (gz_stream*)file; + int err = do_flush (file, flush); + + if (err) return err; + fflush(s->file); + return s->z_err == Z_STREAM_END ? Z_OK : s->z_err; +} +#endif /* NO_GZCOMPRESS */ + +/* =========================================================================== + Sets the starting position for the next gzread or gzwrite on the given + compressed file. The offset represents a number of bytes in the + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error. + SEEK_END is not implemented, returns error. + In this version of the library, gzseek can be extremely slow. +*/ +z_off_t ZEXPORT gzseek (file, offset, whence) + gzFile file; + z_off_t offset; + int whence; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || whence == SEEK_END || + s->z_err == Z_ERRNO || s->z_err == Z_DATA_ERROR) { + return -1L; + } + + if (s->mode == 'w') { +#ifdef NO_GZCOMPRESS + return -1L; +#else + if (whence == SEEK_SET) { + offset -= s->in; + } + if (offset < 0) return -1L; + + /* At this point, offset is the number of zero bytes to write. */ + if (s->inbuf == Z_NULL) { + s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); /* for seeking */ + if (s->inbuf == Z_NULL) return -1L; + zmemzero(s->inbuf, Z_BUFSIZE); + } + while (offset > 0) { + uInt size = Z_BUFSIZE; + if (offset < Z_BUFSIZE) size = (uInt)offset; + + size = gzwrite(file, s->inbuf, size); + if (size == 0) return -1L; + + offset -= size; + } + return s->in; +#endif + } + /* Rest of function is for reading only */ + + /* compute absolute position */ + if (whence == SEEK_CUR) { + offset += s->out; + } + if (offset < 0) return -1L; + + if (s->transparent) { + /* map to fseek */ + s->back = EOF; + s->stream.avail_in = 0; + s->stream.next_in = s->inbuf; + if (fseek(s->file, offset, SEEK_SET) < 0) return -1L; + + s->in = s->out = offset; + return offset; + } + + /* For a negative seek, rewind and use positive seek */ + if (offset >= s->out) { + offset -= s->out; + } else if (gzrewind(file) < 0) { + return -1L; + } + /* offset is now the number of bytes to skip. */ + + if (offset != 0 && s->outbuf == Z_NULL) { + s->outbuf = (Byte*)ALLOC(Z_BUFSIZE); + if (s->outbuf == Z_NULL) return -1L; + } + if (offset && s->back != EOF) { + s->back = EOF; + s->out++; + offset--; + if (s->last) s->z_err = Z_STREAM_END; + } + while (offset > 0) { + int size = Z_BUFSIZE; + if (offset < Z_BUFSIZE) size = (int)offset; + + size = gzread(file, s->outbuf, (uInt)size); + if (size <= 0) return -1L; + offset -= size; + } + return s->out; +} + +/* =========================================================================== + Rewinds input file. +*/ +int ZEXPORT gzrewind (file) + gzFile file; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'r') return -1; + + s->z_err = Z_OK; + s->z_eof = 0; + s->back = EOF; + s->stream.avail_in = 0; + s->stream.next_in = s->inbuf; + s->crc = crc32(0L, Z_NULL, 0); + if (!s->transparent) (void)inflateReset(&s->stream); + s->in = 0; + s->out = 0; + return fseek(s->file, s->start, SEEK_SET); +} + +/* =========================================================================== + Returns the starting position for the next gzread or gzwrite on the + given compressed file. This position represents a number of bytes in the + uncompressed data stream. +*/ +z_off_t ZEXPORT gztell (file) + gzFile file; +{ + return gzseek(file, 0L, SEEK_CUR); +} + +/* =========================================================================== + Returns 1 when EOF has previously been detected reading the given + input stream, otherwise zero. +*/ +int ZEXPORT gzeof (file) + gzFile file; +{ + gz_stream *s = (gz_stream*)file; + + /* With concatenated compressed files that can have embedded + * crc trailers, z_eof is no longer the only/best indicator of EOF + * on a gz_stream. Handle end-of-stream error explicitly here. + */ + if (s == NULL || s->mode != 'r') return 0; + if (s->z_eof) return 1; + return s->z_err == Z_STREAM_END; +} + +/* =========================================================================== + Returns 1 if reading and doing so transparently, otherwise zero. +*/ +int ZEXPORT gzdirect (file) + gzFile file; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'r') return 0; + return s->transparent; +} + +/* =========================================================================== + Outputs a long in LSB order to the given file +*/ +local void putLong (file, x) + FILE *file; + uLong x; +{ + int n; + for (n = 0; n < 4; n++) { + fputc((int)(x & 0xff), file); + x >>= 8; + } +} + +/* =========================================================================== + Reads a long in LSB order from the given gz_stream. Sets z_err in case + of error. +*/ +local uLong getLong (s) + gz_stream *s; +{ + uLong x = (uLong)get_byte(s); + int c; + + x += ((uLong)get_byte(s))<<8; + x += ((uLong)get_byte(s))<<16; + c = get_byte(s); + if (c == EOF) s->z_err = Z_DATA_ERROR; + x += ((uLong)c)<<24; + return x; +} + +/* =========================================================================== + Flushes all pending output if necessary, closes the compressed file + and deallocates all the (de)compression state. +*/ +int ZEXPORT gzclose (file) + gzFile file; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL) return Z_STREAM_ERROR; + + if (s->mode == 'w') { +#ifdef NO_GZCOMPRESS + return Z_STREAM_ERROR; +#else + if (do_flush (file, Z_FINISH) != Z_OK) + return destroy((gz_stream*)file); + + putLong (s->file, s->crc); + putLong (s->file, (uLong)(s->in & 0xffffffff)); +#endif + } + return destroy((gz_stream*)file); +} + +#ifdef STDC +# define zstrerror(errnum) strerror(errnum) +#else +# define zstrerror(errnum) "" +#endif + +/* =========================================================================== + Returns the error message for the last error which occurred on the + given compressed file. errnum is set to zlib error number. If an + error occurred in the file system and not in the compression library, + errnum is set to Z_ERRNO and the application may consult errno + to get the exact error code. +*/ +const char * ZEXPORT gzerror (file, errnum) + gzFile file; + int *errnum; +{ + char *m; + gz_stream *s = (gz_stream*)file; + + if (s == NULL) { + *errnum = Z_STREAM_ERROR; + return (const char*)ERR_MSG(Z_STREAM_ERROR); + } + *errnum = s->z_err; + if (*errnum == Z_OK) return (const char*)""; + + m = (char*)(*errnum == Z_ERRNO ? zstrerror(errno) : s->stream.msg); + + if (m == NULL || *m == '\0') m = (char*)ERR_MSG(s->z_err); + + TRYFREE(s->msg); + s->msg = (char*)ALLOC(strlen(s->path) + strlen(m) + 3); + if (s->msg == Z_NULL) return (const char*)ERR_MSG(Z_MEM_ERROR); + strcpy(s->msg, s->path); + strcat(s->msg, ": "); + strcat(s->msg, m); + return (const char*)s->msg; +} + +/* =========================================================================== + Clear the error and end-of-file flags, and do the same for the real file. +*/ +void ZEXPORT gzclearerr (file) + gzFile file; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL) return; + if (s->z_err != Z_STREAM_END) s->z_err = Z_OK; + s->z_eof = 0; + clearerr(s->file); +} Added: external/zlib/infback.c ============================================================================== --- (empty file) +++ external/zlib/infback.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,623 @@ +/* infback.c -- inflate using a call-back interface + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + This code is largely copied from inflate.c. Normally either infback.o or + inflate.o would be linked into an application--not both. The interface + with inffast.c is retained so that optimized assembler-coded versions of + inflate_fast() can be used with either inflate.c or infback.c. + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +/* function prototypes */ +local void fixedtables OF((struct inflate_state FAR *state)); + +/* + strm provides memory allocation functions in zalloc and zfree, or + Z_NULL to use the library memory allocation functions. + + windowBits is in the range 8..15, and window is a user-supplied + window and output buffer that is 2**windowBits bytes. + */ +int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size) +z_streamp strm; +int windowBits; +unsigned char FAR *window; +const char *version; +int stream_size; +{ + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL || window == Z_NULL || + windowBits < 8 || windowBits > 15) + return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; + } + if (strm->zfree == (free_func)0) strm->zfree = zcfree; + state = (struct inflate_state FAR *)ZALLOC(strm, 1, + sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; + state->dmax = 32768U; + state->wbits = windowBits; + state->wsize = 1U << windowBits; + state->window = window; + state->write = 0; + state->whave = 0; + return Z_OK; +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. Normally this returns fixed tables from inffixed.h. + If BUILDFIXED is defined, then instead this routine builds the tables the + first time it's called, and returns those tables the first time and + thereafter. This reduces the size of the code by about 2K bytes, in + exchange for a little execution time. However, BUILDFIXED should not be + used for threaded applications, since the rewriting of the tables and virgin + may not be thread-safe. + */ +local void fixedtables(state) +struct inflate_state FAR *state; +{ +#ifdef BUILDFIXED + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } +#else /* !BUILDFIXED */ +# include "inffixed.h" +#endif /* BUILDFIXED */ + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +/* Macros for inflateBack(): */ + +/* Load returned state from inflate_fast() */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Set state from registers for inflate_fast() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Assure that some input is available. If input is requested, but denied, + then return a Z_BUF_ERROR from inflateBack(). */ +#define PULL() \ + do { \ + if (have == 0) { \ + have = in(in_desc, &next); \ + if (have == 0) { \ + next = Z_NULL; \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflateBack() + with an error if there is no input available. */ +#define PULLBYTE() \ + do { \ + PULL(); \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflateBack() with + an error. */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* Assure that some output space is available, by writing out the window + if it's full. If the write fails, return from inflateBack() with a + Z_BUF_ERROR. */ +#define ROOM() \ + do { \ + if (left == 0) { \ + put = state->window; \ + left = state->wsize; \ + state->whave = left; \ + if (out(out_desc, put, left)) { \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* + strm provides the memory allocation functions and window buffer on input, + and provides information on the unused input on return. For Z_DATA_ERROR + returns, strm will also provide an error message. + + in() and out() are the call-back input and output functions. When + inflateBack() needs more input, it calls in(). When inflateBack() has + filled the window with output, or when it completes with data in the + window, it calls out() to write out the data. The application must not + change the provided input until in() is called again or inflateBack() + returns. The application must not change the window/output buffer until + inflateBack() returns. + + in() and out() are called with a descriptor parameter provided in the + inflateBack() call. This parameter can be a structure that provides the + information required to do the read or write, as well as accumulated + information on the input and output such as totals and check values. + + in() should return zero on failure. out() should return non-zero on + failure. If either in() or out() fails, than inflateBack() returns a + Z_BUF_ERROR. strm->next_in can be checked for Z_NULL to see whether it + was in() or out() that caused in the error. Otherwise, inflateBack() + returns Z_STREAM_END on success, Z_DATA_ERROR for an deflate format + error, or Z_MEM_ERROR if it could not allocate memory for the state. + inflateBack() can also return Z_STREAM_ERROR if the input parameters + are not correct, i.e. strm is Z_NULL or the state was not initialized. + */ +int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc) +z_streamp strm; +in_func in; +void FAR *in_desc; +out_func out; +void FAR *out_desc; +{ + struct inflate_state FAR *state; + unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code this; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + /* Check that the strm exists and that the state was initialized */ + if (strm == Z_NULL || strm->state == Z_NULL) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* Reset the state */ + strm->msg = Z_NULL; + state->mode = TYPE; + state->last = 0; + state->whave = 0; + next = strm->next_in; + have = next != Z_NULL ? strm->avail_in : 0; + hold = 0; + bits = 0; + put = state->window; + left = state->wsize; + + /* Inflate until end of block marked as last */ + for (;;) + switch (state->mode) { + case TYPE: + /* determine and dispatch block type */ + if (state->last) { + BYTEBITS(); + state->mode = DONE; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + state->last ? " (last)" : "")); + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + Tracev((stderr, "inflate: fixed codes block%s\n", + state->last ? " (last)" : "")); + state->mode = LEN; /* decode codes */ + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + state->last ? " (last)" : "")); + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + + case STORED: + /* get and verify stored block length */ + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %u\n", + state->length)); + INITBITS(); + + /* copy stored block from input to output */ + while (state->length != 0) { + copy = state->length; + PULL(); + ROOM(); + if (copy > have) copy = have; + if (copy > left) copy = left; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + } + Tracev((stderr, "inflate: stored end\n")); + state->mode = TYPE; + break; + + case TABLE: + /* get dynamic table entries descriptor */ + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + Tracev((stderr, "inflate: table sizes ok\n")); + + /* get code length code lengths (not a typo) */ + state->have = 0; + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + + /* get length and distance code code lengths */ + state->have = 0; + while (state->have < state->nlen + state->ndist) { + for (;;) { + this = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if (this.val < 16) { + NEEDBITS(this.bits); + DROPBITS(this.bits); + state->lens[state->have++] = this.val; + } + else { + if (this.val == 16) { + NEEDBITS(this.bits + 2); + DROPBITS(this.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = (unsigned)(state->lens[state->have - 1]); + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (this.val == 17) { + NEEDBITS(this.bits + 3); + DROPBITS(this.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(this.bits + 7); + DROPBITS(this.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* build code tables */ + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (code const FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN; + + case LEN: + /* use inflate_fast() if we have enough input and output */ + if (have >= 6 && left >= 258) { + RESTORE(); + if (state->whave < state->wsize) + state->whave = state->wsize - left; + inflate_fast(strm, state->wsize); + LOAD(); + break; + } + + /* get a literal, length, or end-of-block code */ + for (;;) { + this = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if (this.op && (this.op & 0xf0) == 0) { + last = this; + for (;;) { + this = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + this.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(this.bits); + state->length = (unsigned)this.val; + + /* process literal */ + if (this.op == 0) { + Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", this.val)); + ROOM(); + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + } + + /* process end of block */ + if (this.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + + /* invalid code */ + if (this.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + + /* length code -- get extra bits, if any */ + state->extra = (unsigned)(this.op) & 15; + if (state->extra != 0) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + } + Tracevv((stderr, "inflate: length %u\n", state->length)); + + /* get distance code */ + for (;;) { + this = state->distcode[BITS(state->distbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if ((this.op & 0xf0) == 0) { + last = this; + for (;;) { + this = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + this.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(this.bits); + if (this.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)this.val; + + /* get distance extra bits, if any */ + state->extra = (unsigned)(this.op) & 15; + if (state->extra != 0) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + } + if (state->offset > state->wsize - (state->whave < state->wsize ? + left : 0)) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + + /* copy match from window to output */ + do { + ROOM(); + copy = state->wsize - state->offset; + if (copy < left) { + from = put + copy; + copy = left - copy; + } + else { + from = put - state->offset; + copy = left; + } + if (copy > state->length) copy = state->length; + state->length -= copy; + left -= copy; + do { + *put++ = *from++; + } while (--copy); + } while (state->length != 0); + break; + + case DONE: + /* inflate stream terminated properly -- write leftover output */ + ret = Z_STREAM_END; + if (left < state->wsize) { + if (out(out_desc, state->window, state->wsize - left)) + ret = Z_BUF_ERROR; + } + goto inf_leave; + + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + + default: /* can't happen, but makes compilers happy */ + ret = Z_STREAM_ERROR; + goto inf_leave; + } + + /* Return unused input */ + inf_leave: + strm->next_in = next; + strm->avail_in = have; + return ret; +} + +int ZEXPORT inflateBackEnd(strm) +z_streamp strm; +{ + if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) + return Z_STREAM_ERROR; + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} Added: external/zlib/inffast.c ============================================================================== --- (empty file) +++ external/zlib/inffast.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,318 @@ +/* inffast.c -- fast decoding + * Copyright (C) 1995-2004 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifndef ASMINF + +/* Allow machine dependent optimization for post-increment or pre-increment. + Based on testing to date, + Pre-increment preferred for: + - PowerPC G3 (Adler) + - MIPS R5000 (Randers-Pehrson) + Post-increment preferred for: + - none + No measurable difference: + - Pentium III (Anderson) + - M68060 (Nikl) + */ +#ifdef POSTINC +# define OFF 0 +# define PUP(a) *(a)++ +#else +# define OFF 1 +# define PUP(a) *++(a) +#endif + +/* + Decode literal, length, and distance codes and write out the resulting + literal and match bytes until either not enough input or output is + available, an end-of-block is encountered, or a data error is encountered. + When large enough input and output buffers are supplied to inflate(), for + example, a 16K input buffer and a 64K output buffer, more than 95% of the + inflate execution time is spent in this routine. + + Entry assumptions: + + state->mode == LEN + strm->avail_in >= 6 + strm->avail_out >= 258 + start >= strm->avail_out + state->bits < 8 + + On return, state->mode is one of: + + LEN -- ran out of enough output space or enough available input + TYPE -- reached end of block code, inflate() to interpret next block + BAD -- error in block data + + Notes: + + - The maximum input bits used by a length/distance pair is 15 bits for the + length code, 5 bits for the length extra, 15 bits for the distance code, + and 13 bits for the distance extra. This totals 48 bits, or six bytes. + Therefore if strm->avail_in >= 6, then there is enough input to avoid + checking for available input while decoding. + + - The maximum bytes that a single length/distance pair can output is 258 + bytes, which is the maximum length that can be coded. inflate_fast() + requires strm->avail_out >= 258 for each loop to avoid checking for + output space. + */ +void inflate_fast(strm, start) +z_streamp strm; +unsigned start; /* inflate()'s starting value for strm->avail_out */ +{ + struct inflate_state FAR *state; + unsigned char FAR *in; /* local strm->next_in */ + unsigned char FAR *last; /* while in < last, enough input available */ + unsigned char FAR *out; /* local strm->next_out */ + unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ + unsigned char FAR *end; /* while out < end, enough space available */ +#ifdef INFLATE_STRICT + unsigned dmax; /* maximum distance from zlib header */ +#endif + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned write; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ + unsigned long hold; /* local strm->hold */ + unsigned bits; /* local strm->bits */ + code const FAR *lcode; /* local strm->lencode */ + code const FAR *dcode; /* local strm->distcode */ + unsigned lmask; /* mask for first level of length codes */ + unsigned dmask; /* mask for first level of distance codes */ + code this; /* retrieved table entry */ + unsigned op; /* code bits, operation, extra bits, or */ + /* window position, window bytes to copy */ + unsigned len; /* match length, unused bytes */ + unsigned dist; /* match distance */ + unsigned char FAR *from; /* where to copy match from */ + + /* copy state to local variables */ + state = (struct inflate_state FAR *)strm->state; + in = strm->next_in - OFF; + last = in + (strm->avail_in - 5); + out = strm->next_out - OFF; + beg = out - (start - strm->avail_out); + end = out + (strm->avail_out - 257); +#ifdef INFLATE_STRICT + dmax = state->dmax; +#endif + wsize = state->wsize; + whave = state->whave; + write = state->write; + window = state->window; + hold = state->hold; + bits = state->bits; + lcode = state->lencode; + dcode = state->distcode; + lmask = (1U << state->lenbits) - 1; + dmask = (1U << state->distbits) - 1; + + /* decode literals and length/distances until end-of-block or not enough + input data or output space */ + do { + if (bits < 15) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + this = lcode[hold & lmask]; + dolen: + op = (unsigned)(this.bits); + hold >>= op; + bits -= op; + op = (unsigned)(this.op); + if (op == 0) { /* literal */ + Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", this.val)); + PUP(out) = (unsigned char)(this.val); + } + else if (op & 16) { /* length base */ + len = (unsigned)(this.val); + op &= 15; /* number of extra bits */ + if (op) { + if (bits < op) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + len += (unsigned)hold & ((1U << op) - 1); + hold >>= op; + bits -= op; + } + Tracevv((stderr, "inflate: length %u\n", len)); + if (bits < 15) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + this = dcode[hold & dmask]; + dodist: + op = (unsigned)(this.bits); + hold >>= op; + bits -= op; + op = (unsigned)(this.op); + if (op & 16) { /* distance base */ + dist = (unsigned)(this.val); + op &= 15; /* number of extra bits */ + if (bits < op) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + if (bits < op) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + } + dist += (unsigned)hold & ((1U << op) - 1); +#ifdef INFLATE_STRICT + if (dist > dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + hold >>= op; + bits -= op; + Tracevv((stderr, "inflate: distance %u\n", dist)); + op = (unsigned)(out - beg); /* max distance in output */ + if (dist > op) { /* see if copy from window */ + op = dist - op; /* distance back in window */ + if (op > whave) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + from = window - OFF; + if (write == 0) { /* very common case */ + from += wsize - op; + if (op < len) { /* some from window */ + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = out - dist; /* rest from output */ + } + } + else if (write < op) { /* wrap around window */ + from += wsize + write - op; + op -= write; + if (op < len) { /* some from end of window */ + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = window - OFF; + if (write < len) { /* some from start of window */ + op = write; + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = out - dist; /* rest from output */ + } + } + } + else { /* contiguous in window */ + from += write - op; + if (op < len) { /* some from window */ + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = out - dist; /* rest from output */ + } + } + while (len > 2) { + PUP(out) = PUP(from); + PUP(out) = PUP(from); + PUP(out) = PUP(from); + len -= 3; + } + if (len) { + PUP(out) = PUP(from); + if (len > 1) + PUP(out) = PUP(from); + } + } + else { + from = out - dist; /* copy direct from output */ + do { /* minimum length is three */ + PUP(out) = PUP(from); + PUP(out) = PUP(from); + PUP(out) = PUP(from); + len -= 3; + } while (len > 2); + if (len) { + PUP(out) = PUP(from); + if (len > 1) + PUP(out) = PUP(from); + } + } + } + else if ((op & 64) == 0) { /* 2nd level distance code */ + this = dcode[this.val + (hold & ((1U << op) - 1))]; + goto dodist; + } + else { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + } + else if ((op & 64) == 0) { /* 2nd level length code */ + this = lcode[this.val + (hold & ((1U << op) - 1))]; + goto dolen; + } + else if (op & 32) { /* end-of-block */ + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + else { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + } while (in < last && out < end); + + /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ + len = bits >> 3; + in -= len; + bits -= len << 3; + hold &= (1U << bits) - 1; + + /* update state and return */ + strm->next_in = in + OFF; + strm->next_out = out + OFF; + strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); + strm->avail_out = (unsigned)(out < end ? + 257 + (end - out) : 257 - (out - end)); + state->hold = hold; + state->bits = bits; + return; +} + +/* + inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): + - Using bit fields for code structure + - Different op definition to avoid & for extra bits (do & for table bits) + - Three separate decoding do-loops for direct, window, and write == 0 + - Special case for distance > 1 copies to do overlapped load and store copy + - Explicit branch predictions (based on measured branch probabilities) + - Deferring match copy and interspersed it with decoding subsequent codes + - Swapping literal/length else + - Swapping window/direct else + - Larger unrolled copy loops (three is about right) + - Moving len -= 3 statement into middle of loop + */ + +#endif /* !ASMINF */ Added: external/zlib/inffast.h ============================================================================== --- (empty file) +++ external/zlib/inffast.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,11 @@ +/* inffast.h -- header to use inffast.c + * Copyright (C) 1995-2003 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +void inflate_fast OF((z_streamp strm, unsigned start)); Added: external/zlib/inffixed.h ============================================================================== --- (empty file) +++ external/zlib/inffixed.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,94 @@ + /* inffixed.h -- table for decoding fixed codes + * Generated automatically by makefixed(). + */ + + /* WARNING: this file should *not* be used by applications. It + is part of the implementation of the compression library and + is subject to change. Applications should only use zlib.h. + */ + + static const code lenfix[512] = { + {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, + {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, + {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, + {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176}, + {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20}, + {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100}, + {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8}, + {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216}, + {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76}, + {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114}, + {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2}, + {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148}, + {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42}, + {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86}, + {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15}, + {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236}, + {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62}, + {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142}, + {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31}, + {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162}, + {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25}, + {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105}, + {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4}, + {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202}, + {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69}, + {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125}, + {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13}, + {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195}, + {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35}, + {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91}, + {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19}, + {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246}, + {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55}, + {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135}, + {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99}, + {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190}, + {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16}, + {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96}, + {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6}, + {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209}, + {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72}, + {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116}, + {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4}, + {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153}, + {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44}, + {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82}, + {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11}, + {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229}, + {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58}, + {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138}, + {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51}, + {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173}, + {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30}, + {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110}, + {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0}, + {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195}, + {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65}, + {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121}, + {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9}, + {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258}, + {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37}, + {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93}, + {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23}, + {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251}, + {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51}, + {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131}, + {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67}, + {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183}, + {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23}, + {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103}, + {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9}, + {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223}, + {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79}, + {0,9,255} + }; + + static const code distfix[32] = { + {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, + {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, + {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, + {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577}, + {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073}, + {22,5,193},{64,5,0} + }; Added: external/zlib/inflate.c ============================================================================== --- (empty file) +++ external/zlib/inflate.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,1368 @@ +/* inflate.c -- zlib decompression + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * Change history: + * + * 1.2.beta0 24 Nov 2002 + * - First version -- complete rewrite of inflate to simplify code, avoid + * creation of window when not needed, minimize use of window when it is + * needed, make inffast.c even faster, implement gzip decoding, and to + * improve code readability and style over the previous zlib inflate code + * + * 1.2.beta1 25 Nov 2002 + * - Use pointers for available input and output checking in inffast.c + * - Remove input and output counters in inffast.c + * - Change inffast.c entry and loop from avail_in >= 7 to >= 6 + * - Remove unnecessary second byte pull from length extra in inffast.c + * - Unroll direct copy to three copies per loop in inffast.c + * + * 1.2.beta2 4 Dec 2002 + * - Change external routine names to reduce potential conflicts + * - Correct filename to inffixed.h for fixed tables in inflate.c + * - Make hbuf[] unsigned char to match parameter type in inflate.c + * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset) + * to avoid negation problem on Alphas (64 bit) in inflate.c + * + * 1.2.beta3 22 Dec 2002 + * - Add comments on state->bits assertion in inffast.c + * - Add comments on op field in inftrees.h + * - Fix bug in reuse of allocated window after inflateReset() + * - Remove bit fields--back to byte structure for speed + * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths + * - Change post-increments to pre-increments in inflate_fast(), PPC biased? + * - Add compile time option, POSTINC, to use post-increments instead (Intel?) + * - Make MATCH copy in inflate() much faster for when inflate_fast() not used + * - Use local copies of stream next and avail values, as well as local bit + * buffer and bit count in inflate()--for speed when inflate_fast() not used + * + * 1.2.beta4 1 Jan 2003 + * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings + * - Move a comment on output buffer sizes from inffast.c to inflate.c + * - Add comments in inffast.c to introduce the inflate_fast() routine + * - Rearrange window copies in inflate_fast() for speed and simplification + * - Unroll last copy for window match in inflate_fast() + * - Use local copies of window variables in inflate_fast() for speed + * - Pull out common write == 0 case for speed in inflate_fast() + * - Make op and len in inflate_fast() unsigned for consistency + * - Add FAR to lcode and dcode declarations in inflate_fast() + * - Simplified bad distance check in inflate_fast() + * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new + * source file infback.c to provide a call-back interface to inflate for + * programs like gzip and unzip -- uses window as output buffer to avoid + * window copying + * + * 1.2.beta5 1 Jan 2003 + * - Improved inflateBack() interface to allow the caller to provide initial + * input in strm. + * - Fixed stored blocks bug in inflateBack() + * + * 1.2.beta6 4 Jan 2003 + * - Added comments in inffast.c on effectiveness of POSTINC + * - Typecasting all around to reduce compiler warnings + * - Changed loops from while (1) or do {} while (1) to for (;;), again to + * make compilers happy + * - Changed type of window in inflateBackInit() to unsigned char * + * + * 1.2.beta7 27 Jan 2003 + * - Changed many types to unsigned or unsigned short to avoid warnings + * - Added inflateCopy() function + * + * 1.2.0 9 Mar 2003 + * - Changed inflateBack() interface to provide separate opaque descriptors + * for the in() and out() functions + * - Changed inflateBack() argument and in_func typedef to swap the length + * and buffer address return values for the input function + * - Check next_in and next_out for Z_NULL on entry to inflate() + * + * The history for versions after 1.2.0 are in ChangeLog in zlib distribution. + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifdef MAKEFIXED +# ifndef BUILDFIXED +# define BUILDFIXED +# endif +#endif + +/* function prototypes */ +local void fixedtables OF((struct inflate_state FAR *state)); +local int updatewindow OF((z_streamp strm, unsigned out)); +#ifdef BUILDFIXED + void makefixed OF((void)); +#endif +local unsigned syncsearch OF((unsigned FAR *have, unsigned char FAR *buf, + unsigned len)); + +int ZEXPORT inflateReset(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + strm->total_in = strm->total_out = state->total = 0; + strm->msg = Z_NULL; + strm->adler = 1; /* to support ill-conceived Java test suite */ + state->mode = HEAD; + state->last = 0; + state->havedict = 0; + state->dmax = 32768U; + state->head = Z_NULL; + state->wsize = 0; + state->whave = 0; + state->write = 0; + state->hold = 0; + state->bits = 0; + state->lencode = state->distcode = state->next = state->codes; + Tracev((stderr, "inflate: reset\n")); + return Z_OK; +} + +int ZEXPORT inflatePrime(strm, bits, value) +z_streamp strm; +int bits; +int value; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR; + value &= (1L << bits) - 1; + state->hold += value << state->bits; + state->bits += bits; + return Z_OK; +} + +int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size) +z_streamp strm; +int windowBits; +const char *version; +int stream_size; +{ + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL) return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; + } + if (strm->zfree == (free_func)0) strm->zfree = zcfree; + state = (struct inflate_state FAR *) + ZALLOC(strm, 1, sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; + if (windowBits < 0) { + state->wrap = 0; + windowBits = -windowBits; + } + else { + state->wrap = (windowBits >> 4) + 1; +#ifdef GUNZIP + if (windowBits < 48) windowBits &= 15; +#endif + } + if (windowBits < 8 || windowBits > 15) { + ZFREE(strm, state); + strm->state = Z_NULL; + return Z_STREAM_ERROR; + } + state->wbits = (unsigned)windowBits; + state->window = Z_NULL; + return inflateReset(strm); +} + +int ZEXPORT inflateInit_(strm, version, stream_size) +z_streamp strm; +const char *version; +int stream_size; +{ + return inflateInit2_(strm, DEF_WBITS, version, stream_size); +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. Normally this returns fixed tables from inffixed.h. + If BUILDFIXED is defined, then instead this routine builds the tables the + first time it's called, and returns those tables the first time and + thereafter. This reduces the size of the code by about 2K bytes, in + exchange for a little execution time. However, BUILDFIXED should not be + used for threaded applications, since the rewriting of the tables and virgin + may not be thread-safe. + */ +local void fixedtables(state) +struct inflate_state FAR *state; +{ +#ifdef BUILDFIXED + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } +#else /* !BUILDFIXED */ +# include "inffixed.h" +#endif /* BUILDFIXED */ + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +#ifdef MAKEFIXED +#include + +/* + Write out the inffixed.h that is #include'd above. Defining MAKEFIXED also + defines BUILDFIXED, so the tables are built on the fly. makefixed() writes + those tables to stdout, which would be piped to inffixed.h. A small program + can simply call makefixed to do this: + + void makefixed(void); + + int main(void) + { + makefixed(); + return 0; + } + + Then that can be linked with zlib built with MAKEFIXED defined and run: + + a.out > inffixed.h + */ +void makefixed() +{ + unsigned low, size; + struct inflate_state state; + + fixedtables(&state); + puts(" /* inffixed.h -- table for decoding fixed codes"); + puts(" * Generated automatically by makefixed()."); + puts(" */"); + puts(""); + puts(" /* WARNING: this file should *not* be used by applications."); + puts(" It is part of the implementation of this library and is"); + puts(" subject to change. Applications should only use zlib.h."); + puts(" */"); + puts(""); + size = 1U << 9; + printf(" static const code lenfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 7) == 0) printf("\n "); + printf("{%u,%u,%d}", state.lencode[low].op, state.lencode[low].bits, + state.lencode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); + size = 1U << 5; + printf("\n static const code distfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 6) == 0) printf("\n "); + printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits, + state.distcode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); +} +#endif /* MAKEFIXED */ + +/* + Update the window with the last wsize (normally 32K) bytes written before + returning. If window does not exist yet, create it. This is only called + when a window is already in use, or when output has been written during this + inflate call, but the end of the deflate stream has not been reached yet. + It is also called to create a window for dictionary data when a dictionary + is loaded. + + Providing output buffers larger than 32K to inflate() should provide a speed + advantage, since only the last 32K of output is copied to the sliding window + upon return from inflate(), and since all distances after the first 32K of + output will fall in the output data, making match copies simpler and faster. + The advantage may be dependent on the size of the processor's data caches. + */ +local int updatewindow(strm, out) +z_streamp strm; +unsigned out; +{ + struct inflate_state FAR *state; + unsigned copy, dist; + + state = (struct inflate_state FAR *)strm->state; + + /* if it hasn't been done already, allocate space for the window */ + if (state->window == Z_NULL) { + state->window = (unsigned char FAR *) + ZALLOC(strm, 1U << state->wbits, + sizeof(unsigned char)); + if (state->window == Z_NULL) return 1; + } + + /* if window not in use yet, initialize */ + if (state->wsize == 0) { + state->wsize = 1U << state->wbits; + state->write = 0; + state->whave = 0; + } + + /* copy state->wsize or less output bytes into the circular window */ + copy = out - strm->avail_out; + if (copy >= state->wsize) { + zmemcpy(state->window, strm->next_out - state->wsize, state->wsize); + state->write = 0; + state->whave = state->wsize; + } + else { + dist = state->wsize - state->write; + if (dist > copy) dist = copy; + zmemcpy(state->window + state->write, strm->next_out - copy, dist); + copy -= dist; + if (copy) { + zmemcpy(state->window, strm->next_out - copy, copy); + state->write = copy; + state->whave = state->wsize; + } + else { + state->write += dist; + if (state->write == state->wsize) state->write = 0; + if (state->whave < state->wsize) state->whave += dist; + } + } + return 0; +} + +/* Macros for inflate(): */ + +/* check function to use adler32() for zlib or crc32() for gzip */ +#ifdef GUNZIP +# define UPDATE(check, buf, len) \ + (state->flags ? crc32(check, buf, len) : adler32(check, buf, len)) +#else +# define UPDATE(check, buf, len) adler32(check, buf, len) +#endif + +/* check macros for header crc */ +#ifdef GUNZIP +# define CRC2(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + check = crc32(check, hbuf, 2); \ + } while (0) + +# define CRC4(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + hbuf[2] = (unsigned char)((word) >> 16); \ + hbuf[3] = (unsigned char)((word) >> 24); \ + check = crc32(check, hbuf, 4); \ + } while (0) +#endif + +/* Load registers with state in inflate() for speed */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Restore state from registers in inflate() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflate() + if there is no input available. */ +#define PULLBYTE() \ + do { \ + if (have == 0) goto inf_leave; \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflate(). */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* Reverse the bytes in a 32-bit value */ +#define REVERSE(q) \ + ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ + (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) + +/* + inflate() uses a state machine to process as much input data and generate as + much output data as possible before returning. The state machine is + structured roughly as follows: + + for (;;) switch (state) { + ... + case STATEn: + if (not enough input data or output space to make progress) + return; + ... make progress ... + state = STATEm; + break; + ... + } + + so when inflate() is called again, the same case is attempted again, and + if the appropriate resources are provided, the machine proceeds to the + next state. The NEEDBITS() macro is usually the way the state evaluates + whether it can proceed or should return. NEEDBITS() does the return if + the requested bits are not available. The typical use of the BITS macros + is: + + NEEDBITS(n); + ... do something with BITS(n) ... + DROPBITS(n); + + where NEEDBITS(n) either returns from inflate() if there isn't enough + input left to load n bits into the accumulator, or it continues. BITS(n) + gives the low n bits in the accumulator. When done, DROPBITS(n) drops + the low n bits off the accumulator. INITBITS() clears the accumulator + and sets the number of available bits to zero. BYTEBITS() discards just + enough bits to put the accumulator on a byte boundary. After BYTEBITS() + and a NEEDBITS(8), then BITS(8) would return the next byte in the stream. + + NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return + if there is no input available. The decoding of variable length codes uses + PULLBYTE() directly in order to pull just enough bytes to decode the next + code, and no more. + + Some states loop until they get enough input, making sure that enough + state information is maintained to continue the loop where it left off + if NEEDBITS() returns in the loop. For example, want, need, and keep + would all have to actually be part of the saved state in case NEEDBITS() + returns: + + case STATEw: + while (want < need) { + NEEDBITS(n); + keep[want++] = BITS(n); + DROPBITS(n); + } + state = STATEx; + case STATEx: + + As shown above, if the next state is also the next case, then the break + is omitted. + + A state may also return if there is not enough output space available to + complete that state. Those states are copying stored data, writing a + literal byte, and copying a matching string. + + When returning, a "goto inf_leave" is used to update the total counters, + update the check value, and determine whether any progress has been made + during that inflate() call in order to return the proper return code. + Progress is defined as a change in either strm->avail_in or strm->avail_out. + When there is a window, goto inf_leave will update the window with the last + output written. If a goto inf_leave occurs in the middle of decompression + and there is no window currently, goto inf_leave will create one and copy + output to the window for the next call of inflate(). + + In this implementation, the flush parameter of inflate() only affects the + return code (per zlib.h). inflate() always writes as much as possible to + strm->next_out, given the space available and the provided input--the effect + documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers + the allocation of and copying into a sliding window until necessary, which + provides the effect documented in zlib.h for Z_FINISH when the entire input + stream available. So the only thing the flush parameter actually does is: + when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it + will return Z_BUF_ERROR if it has not reached the end of the stream. + */ + +int ZEXPORT inflate(strm, flush) +z_streamp strm; +int flush; +{ + struct inflate_state FAR *state; + unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned in, out; /* save starting available input and output */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code this; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ +#ifdef GUNZIP + unsigned char hbuf[4]; /* buffer for gzip header crc calculation */ +#endif + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + if (strm == Z_NULL || strm->state == Z_NULL || strm->next_out == Z_NULL || + (strm->next_in == Z_NULL && strm->avail_in != 0)) + return Z_STREAM_ERROR; + + state = (struct inflate_state FAR *)strm->state; + if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */ + LOAD(); + in = have; + out = left; + ret = Z_OK; + for (;;) + switch (state->mode) { + case HEAD: + if (state->wrap == 0) { + state->mode = TYPEDO; + break; + } + NEEDBITS(16); +#ifdef GUNZIP + if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */ + state->check = crc32(0L, Z_NULL, 0); + CRC2(state->check, hold); + INITBITS(); + state->mode = FLAGS; + break; + } + state->flags = 0; /* expect zlib header */ + if (state->head != Z_NULL) + state->head->done = -1; + if (!(state->wrap & 1) || /* check if zlib header allowed */ +#else + if ( +#endif + ((BITS(8) << 8) + (hold >> 8)) % 31) { + strm->msg = (char *)"incorrect header check"; + state->mode = BAD; + break; + } + if (BITS(4) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + DROPBITS(4); + len = BITS(4) + 8; + if (len > state->wbits) { + strm->msg = (char *)"invalid window size"; + state->mode = BAD; + break; + } + state->dmax = 1U << len; + Tracev((stderr, "inflate: zlib header ok\n")); + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = hold & 0x200 ? DICTID : TYPE; + INITBITS(); + break; +#ifdef GUNZIP + case FLAGS: + NEEDBITS(16); + state->flags = (int)(hold); + if ((state->flags & 0xff) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + if (state->flags & 0xe000) { + strm->msg = (char *)"unknown header flags set"; + state->mode = BAD; + break; + } + if (state->head != Z_NULL) + state->head->text = (int)((hold >> 8) & 1); + if (state->flags & 0x0200) CRC2(state->check, hold); + INITBITS(); + state->mode = TIME; + case TIME: + NEEDBITS(32); + if (state->head != Z_NULL) + state->head->time = hold; + if (state->flags & 0x0200) CRC4(state->check, hold); + INITBITS(); + state->mode = OS; + case OS: + NEEDBITS(16); + if (state->head != Z_NULL) { + state->head->xflags = (int)(hold & 0xff); + state->head->os = (int)(hold >> 8); + } + if (state->flags & 0x0200) CRC2(state->check, hold); + INITBITS(); + state->mode = EXLEN; + case EXLEN: + if (state->flags & 0x0400) { + NEEDBITS(16); + state->length = (unsigned)(hold); + if (state->head != Z_NULL) + state->head->extra_len = (unsigned)hold; + if (state->flags & 0x0200) CRC2(state->check, hold); + INITBITS(); + } + else if (state->head != Z_NULL) + state->head->extra = Z_NULL; + state->mode = EXTRA; + case EXTRA: + if (state->flags & 0x0400) { + copy = state->length; + if (copy > have) copy = have; + if (copy) { + if (state->head != Z_NULL && + state->head->extra != Z_NULL) { + len = state->head->extra_len - state->length; + zmemcpy(state->head->extra + len, next, + len + copy > state->head->extra_max ? + state->head->extra_max - len : copy); + } + if (state->flags & 0x0200) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + state->length -= copy; + } + if (state->length) goto inf_leave; + } + state->length = 0; + state->mode = NAME; + case NAME: + if (state->flags & 0x0800) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->name != Z_NULL && + state->length < state->head->name_max) + state->head->name[state->length++] = len; + } while (len && copy < have); + if (state->flags & 0x0200) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->name = Z_NULL; + state->length = 0; + state->mode = COMMENT; + case COMMENT: + if (state->flags & 0x1000) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->comment != Z_NULL && + state->length < state->head->comm_max) + state->head->comment[state->length++] = len; + } while (len && copy < have); + if (state->flags & 0x0200) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->comment = Z_NULL; + state->mode = HCRC; + case HCRC: + if (state->flags & 0x0200) { + NEEDBITS(16); + if (hold != (state->check & 0xffff)) { + strm->msg = (char *)"header crc mismatch"; + state->mode = BAD; + break; + } + INITBITS(); + } + if (state->head != Z_NULL) { + state->head->hcrc = (int)((state->flags >> 9) & 1); + state->head->done = 1; + } + strm->adler = state->check = crc32(0L, Z_NULL, 0); + state->mode = TYPE; + break; +#endif + case DICTID: + NEEDBITS(32); + strm->adler = state->check = REVERSE(hold); + INITBITS(); + state->mode = DICT; + case DICT: + if (state->havedict == 0) { + RESTORE(); + return Z_NEED_DICT; + } + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = TYPE; + case TYPE: + if (flush == Z_BLOCK) goto inf_leave; + case TYPEDO: + if (state->last) { + BYTEBITS(); + state->mode = CHECK; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + state->last ? " (last)" : "")); + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + Tracev((stderr, "inflate: fixed codes block%s\n", + state->last ? " (last)" : "")); + state->mode = LEN; /* decode codes */ + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + state->last ? " (last)" : "")); + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + case STORED: + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %u\n", + state->length)); + INITBITS(); + state->mode = COPY; + case COPY: + copy = state->length; + if (copy) { + if (copy > have) copy = have; + if (copy > left) copy = left; + if (copy == 0) goto inf_leave; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + break; + } + Tracev((stderr, "inflate: stored end\n")); + state->mode = TYPE; + break; + case TABLE: + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + Tracev((stderr, "inflate: table sizes ok\n")); + state->have = 0; + state->mode = LENLENS; + case LENLENS: + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + state->have = 0; + state->mode = CODELENS; + case CODELENS: + while (state->have < state->nlen + state->ndist) { + for (;;) { + this = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if (this.val < 16) { + NEEDBITS(this.bits); + DROPBITS(this.bits); + state->lens[state->have++] = this.val; + } + else { + if (this.val == 16) { + NEEDBITS(this.bits + 2); + DROPBITS(this.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = state->lens[state->have - 1]; + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (this.val == 17) { + NEEDBITS(this.bits + 3); + DROPBITS(this.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(this.bits + 7); + DROPBITS(this.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* build code tables */ + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (code const FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN; + case LEN: + if (have >= 6 && left >= 258) { + RESTORE(); + inflate_fast(strm, out); + LOAD(); + break; + } + for (;;) { + this = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if (this.op && (this.op & 0xf0) == 0) { + last = this; + for (;;) { + this = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + this.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(this.bits); + state->length = (unsigned)this.val; + if ((int)(this.op) == 0) { + Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", this.val)); + state->mode = LIT; + break; + } + if (this.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + if (this.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + state->extra = (unsigned)(this.op) & 15; + state->mode = LENEXT; + case LENEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + } + Tracevv((stderr, "inflate: length %u\n", state->length)); + state->mode = DIST; + case DIST: + for (;;) { + this = state->distcode[BITS(state->distbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if ((this.op & 0xf0) == 0) { + last = this; + for (;;) { + this = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + this.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(this.bits); + if (this.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)this.val; + state->extra = (unsigned)(this.op) & 15; + state->mode = DISTEXT; + case DISTEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + } +#ifdef INFLATE_STRICT + if (state->offset > state->dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + if (state->offset > state->whave + out - left) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + state->mode = MATCH; + case MATCH: + if (left == 0) goto inf_leave; + copy = out - left; + if (state->offset > copy) { /* copy from window */ + copy = state->offset - copy; + if (copy > state->write) { + copy -= state->write; + from = state->window + (state->wsize - copy); + } + else + from = state->window + (state->write - copy); + if (copy > state->length) copy = state->length; + } + else { /* copy from output */ + from = put - state->offset; + copy = state->length; + } + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = *from++; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; + case LIT: + if (left == 0) goto inf_leave; + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + case CHECK: + if (state->wrap) { + NEEDBITS(32); + out -= left; + strm->total_out += out; + state->total += out; + if (out) + strm->adler = state->check = + UPDATE(state->check, put - out, out); + out = left; + if (( +#ifdef GUNZIP + state->flags ? hold : +#endif + REVERSE(hold)) != state->check) { + strm->msg = (char *)"incorrect data check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: check matches trailer\n")); + } +#ifdef GUNZIP + state->mode = LENGTH; + case LENGTH: + if (state->wrap && state->flags) { + NEEDBITS(32); + if (hold != (state->total & 0xffffffffUL)) { + strm->msg = (char *)"incorrect length check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: length matches trailer\n")); + } +#endif + state->mode = DONE; + case DONE: + ret = Z_STREAM_END; + goto inf_leave; + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + case MEM: + return Z_MEM_ERROR; + case SYNC: + default: + return Z_STREAM_ERROR; + } + + /* + Return from inflate(), updating the total counts and the check value. + If there was no progress during the inflate() call, return a buffer + error. Call updatewindow() to create and/or update the window state. + Note: a memory error from inflate() is non-recoverable. + */ + inf_leave: + RESTORE(); + if (state->wsize || (state->mode < CHECK && out != strm->avail_out)) + if (updatewindow(strm, out)) { + state->mode = MEM; + return Z_MEM_ERROR; + } + in -= strm->avail_in; + out -= strm->avail_out; + strm->total_in += in; + strm->total_out += out; + state->total += out; + if (state->wrap && out) + strm->adler = state->check = + UPDATE(state->check, strm->next_out - out, out); + strm->data_type = state->bits + (state->last ? 64 : 0) + + (state->mode == TYPE ? 128 : 0); + if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) + ret = Z_BUF_ERROR; + return ret; +} + +int ZEXPORT inflateEnd(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->window != Z_NULL) ZFREE(strm, state->window); + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} + +int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength) +z_streamp strm; +const Bytef *dictionary; +uInt dictLength; +{ + struct inflate_state FAR *state; + unsigned long id; + + /* check state */ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->wrap != 0 && state->mode != DICT) + return Z_STREAM_ERROR; + + /* check for correct dictionary id */ + if (state->mode == DICT) { + id = adler32(0L, Z_NULL, 0); + id = adler32(id, dictionary, dictLength); + if (id != state->check) + return Z_DATA_ERROR; + } + + /* copy dictionary to window */ + if (updatewindow(strm, strm->avail_out)) { + state->mode = MEM; + return Z_MEM_ERROR; + } + if (dictLength > state->wsize) { + zmemcpy(state->window, dictionary + dictLength - state->wsize, + state->wsize); + state->whave = state->wsize; + } + else { + zmemcpy(state->window + state->wsize - dictLength, dictionary, + dictLength); + state->whave = dictLength; + } + state->havedict = 1; + Tracev((stderr, "inflate: dictionary set\n")); + return Z_OK; +} + +int ZEXPORT inflateGetHeader(strm, head) +z_streamp strm; +gz_headerp head; +{ + struct inflate_state FAR *state; + + /* check state */ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if ((state->wrap & 2) == 0) return Z_STREAM_ERROR; + + /* save header structure */ + state->head = head; + head->done = 0; + return Z_OK; +} + +/* + Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found + or when out of input. When called, *have is the number of pattern bytes + found in order so far, in 0..3. On return *have is updated to the new + state. If on return *have equals four, then the pattern was found and the + return value is how many bytes were read including the last byte of the + pattern. If *have is less than four, then the pattern has not been found + yet and the return value is len. In the latter case, syncsearch() can be + called again with more data and the *have state. *have is initialized to + zero for the first call. + */ +local unsigned syncsearch(have, buf, len) +unsigned FAR *have; +unsigned char FAR *buf; +unsigned len; +{ + unsigned got; + unsigned next; + + got = *have; + next = 0; + while (next < len && got < 4) { + if ((int)(buf[next]) == (got < 2 ? 0 : 0xff)) + got++; + else if (buf[next]) + got = 0; + else + got = 4 - got; + next++; + } + *have = got; + return next; +} + +int ZEXPORT inflateSync(strm) +z_streamp strm; +{ + unsigned len; /* number of bytes to look at or looked at */ + unsigned long in, out; /* temporary to save total_in and total_out */ + unsigned char buf[4]; /* to restore bit buffer to byte string */ + struct inflate_state FAR *state; + + /* check parameters */ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR; + + /* if first time, start search in bit buffer */ + if (state->mode != SYNC) { + state->mode = SYNC; + state->hold <<= state->bits & 7; + state->bits -= state->bits & 7; + len = 0; + while (state->bits >= 8) { + buf[len++] = (unsigned char)(state->hold); + state->hold >>= 8; + state->bits -= 8; + } + state->have = 0; + syncsearch(&(state->have), buf, len); + } + + /* search available input */ + len = syncsearch(&(state->have), strm->next_in, strm->avail_in); + strm->avail_in -= len; + strm->next_in += len; + strm->total_in += len; + + /* return no joy or set up to restart inflate() on a new block */ + if (state->have != 4) return Z_DATA_ERROR; + in = strm->total_in; out = strm->total_out; + inflateReset(strm); + strm->total_in = in; strm->total_out = out; + state->mode = TYPE; + return Z_OK; +} + +/* + Returns true if inflate is currently at the end of a block generated by + Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP + implementation to provide an additional safety check. PPP uses + Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored + block. When decompressing, PPP checks that at the end of input packet, + inflate is waiting for these length bytes. + */ +int ZEXPORT inflateSyncPoint(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + return state->mode == STORED && state->bits == 0; +} + +int ZEXPORT inflateCopy(dest, source) +z_streamp dest; +z_streamp source; +{ + struct inflate_state FAR *state; + struct inflate_state FAR *copy; + unsigned char FAR *window; + unsigned wsize; + + /* check input */ + if (dest == Z_NULL || source == Z_NULL || source->state == Z_NULL || + source->zalloc == (alloc_func)0 || source->zfree == (free_func)0) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)source->state; + + /* allocate space */ + copy = (struct inflate_state FAR *) + ZALLOC(source, 1, sizeof(struct inflate_state)); + if (copy == Z_NULL) return Z_MEM_ERROR; + window = Z_NULL; + if (state->window != Z_NULL) { + window = (unsigned char FAR *) + ZALLOC(source, 1U << state->wbits, sizeof(unsigned char)); + if (window == Z_NULL) { + ZFREE(source, copy); + return Z_MEM_ERROR; + } + } + + /* copy state */ + zmemcpy(dest, source, sizeof(z_stream)); + zmemcpy(copy, state, sizeof(struct inflate_state)); + if (state->lencode >= state->codes && + state->lencode <= state->codes + ENOUGH - 1) { + copy->lencode = copy->codes + (state->lencode - state->codes); + copy->distcode = copy->codes + (state->distcode - state->codes); + } + copy->next = copy->codes + (state->next - state->codes); + if (window != Z_NULL) { + wsize = 1U << state->wbits; + zmemcpy(window, state->window, wsize); + } + copy->window = window; + dest->state = (struct internal_state FAR *)copy; + return Z_OK; +} Added: external/zlib/inflate.h ============================================================================== --- (empty file) +++ external/zlib/inflate.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,115 @@ +/* inflate.h -- internal inflate state definition + * Copyright (C) 1995-2004 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer decoding by inflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip decoding + should be left enabled. */ +#ifndef NO_GZIP +# define GUNZIP +#endif + +/* Possible inflate modes between inflate() calls */ +typedef enum { + HEAD, /* i: waiting for magic header */ + FLAGS, /* i: waiting for method and flags (gzip) */ + TIME, /* i: waiting for modification time (gzip) */ + OS, /* i: waiting for extra flags and operating system (gzip) */ + EXLEN, /* i: waiting for extra length (gzip) */ + EXTRA, /* i: waiting for extra bytes (gzip) */ + NAME, /* i: waiting for end of file name (gzip) */ + COMMENT, /* i: waiting for end of comment (gzip) */ + HCRC, /* i: waiting for header crc (gzip) */ + DICTID, /* i: waiting for dictionary check value */ + DICT, /* waiting for inflateSetDictionary() call */ + TYPE, /* i: waiting for type bits, including last-flag bit */ + TYPEDO, /* i: same, but skip check to exit inflate on new block */ + STORED, /* i: waiting for stored size (length and complement) */ + COPY, /* i/o: waiting for input or output to copy stored block */ + TABLE, /* i: waiting for dynamic block table lengths */ + LENLENS, /* i: waiting for code length code lengths */ + CODELENS, /* i: waiting for length/lit and distance code lengths */ + LEN, /* i: waiting for length/lit code */ + LENEXT, /* i: waiting for length extra bits */ + DIST, /* i: waiting for distance code */ + DISTEXT, /* i: waiting for distance extra bits */ + MATCH, /* o: waiting for output space to copy string */ + LIT, /* o: waiting for output space to write literal */ + CHECK, /* i: waiting for 32-bit check value */ + LENGTH, /* i: waiting for 32-bit length (gzip) */ + DONE, /* finished check, done -- remain here until reset */ + BAD, /* got a data error -- remain here until reset */ + MEM, /* got an inflate() memory error -- remain here until reset */ + SYNC /* looking for synchronization bytes to restart inflate() */ +} inflate_mode; + +/* + State transitions between above modes - + + (most modes can go to the BAD or MEM mode -- not shown for clarity) + + Process header: + HEAD -> (gzip) or (zlib) + (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME + NAME -> COMMENT -> HCRC -> TYPE + (zlib) -> DICTID or TYPE + DICTID -> DICT -> TYPE + Read deflate blocks: + TYPE -> STORED or TABLE or LEN or CHECK + STORED -> COPY -> TYPE + TABLE -> LENLENS -> CODELENS -> LEN + Read deflate codes: + LEN -> LENEXT or LIT or TYPE + LENEXT -> DIST -> DISTEXT -> MATCH -> LEN + LIT -> LEN + Process trailer: + CHECK -> LENGTH -> DONE + */ + +/* state maintained between inflate() calls. Approximately 7K bytes. */ +struct inflate_state { + inflate_mode mode; /* current inflate mode */ + int last; /* true if processing last block */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ + int havedict; /* true if dictionary provided */ + int flags; /* gzip header method and flags (0 if zlib) */ + unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ + unsigned long check; /* protected copy of check value */ + unsigned long total; /* protected copy of output count */ + gz_headerp head; /* where to save gzip header information */ + /* sliding window */ + unsigned wbits; /* log base 2 of requested window size */ + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned write; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if needed */ + /* bit accumulator */ + unsigned long hold; /* input bit accumulator */ + unsigned bits; /* number of bits in "in" */ + /* for string and stored block copying */ + unsigned length; /* literal or length of data to copy */ + unsigned offset; /* distance back to copy string from */ + /* for table and code decoding */ + unsigned extra; /* extra bits needed */ + /* fixed and dynamic code tables */ + code const FAR *lencode; /* starting table for length/literal codes */ + code const FAR *distcode; /* starting table for distance codes */ + unsigned lenbits; /* index bits for lencode */ + unsigned distbits; /* index bits for distcode */ + /* dynamic table building */ + unsigned ncode; /* number of code length code lengths */ + unsigned nlen; /* number of length code lengths */ + unsigned ndist; /* number of distance code lengths */ + unsigned have; /* number of code lengths in lens[] */ + code FAR *next; /* next available space in codes[] */ + unsigned short lens[320]; /* temporary storage for code lengths */ + unsigned short work[288]; /* work area for code table building */ + code codes[ENOUGH]; /* space for code tables */ +}; Added: external/zlib/inftrees.c ============================================================================== --- (empty file) +++ external/zlib/inftrees.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,329 @@ +/* inftrees.c -- generate Huffman trees for efficient decoding + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" + +#define MAXBITS 15 + +const char inflate_copyright[] = + " inflate 1.2.3 Copyright 1995-2005 Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* + Build a set of tables to decode the provided canonical Huffman code. + The code lengths are lens[0..codes-1]. The result starts at *table, + whose indices are 0..2^bits-1. work is a writable array of at least + lens shorts, which is used as a work area. type is the type of code + to be generated, CODES, LENS, or DISTS. On return, zero is success, + -1 is an invalid code, and +1 means that ENOUGH isn't enough. table + on return points to the next available entry's address. bits is the + requested root table index bits, and on return it is the actual root + table index bits. It will differ if the request is greater than the + longest code or if it is less than the shortest code. + */ +int inflate_table(type, lens, codes, table, bits, work) +codetype type; +unsigned short FAR *lens; +unsigned codes; +code FAR * FAR *table; +unsigned FAR *bits; +unsigned short FAR *work; +{ + unsigned len; /* a code's length in bits */ + unsigned sym; /* index of code symbols */ + unsigned min, max; /* minimum and maximum code lengths */ + unsigned root; /* number of index bits for root table */ + unsigned curr; /* number of index bits for current table */ + unsigned drop; /* code bits to drop for sub-table */ + int left; /* number of prefix codes available */ + unsigned used; /* code entries in table used */ + unsigned huff; /* Huffman code */ + unsigned incr; /* for incrementing code, index */ + unsigned fill; /* index for replicating entries */ + unsigned low; /* low bits for current root entry */ + unsigned mask; /* mask for low root bits */ + code this; /* table entry for duplication */ + code FAR *next; /* next available space in table */ + const unsigned short FAR *base; /* base value table to use */ + const unsigned short FAR *extra; /* extra bits table to use */ + int end; /* use base and extra for symbol > end */ + unsigned short count[MAXBITS+1]; /* number of codes of each length */ + unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ + static const unsigned short lbase[31] = { /* Length codes 257..285 base */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + static const unsigned short lext[31] = { /* Length codes 257..285 extra */ + 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 201, 196}; + static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577, 0, 0}; + static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ + 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, + 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, + 28, 28, 29, 29, 64, 64}; + + /* + Process a set of code lengths to create a canonical Huffman code. The + code lengths are lens[0..codes-1]. Each length corresponds to the + symbols 0..codes-1. The Huffman code is generated by first sorting the + symbols by length from short to long, and retaining the symbol order + for codes with equal lengths. Then the code starts with all zero bits + for the first code of the shortest length, and the codes are integer + increments for the same length, and zeros are appended as the length + increases. For the deflate format, these bits are stored backwards + from their more natural integer increment ordering, and so when the + decoding tables are built in the large loop below, the integer codes + are incremented backwards. + + This routine assumes, but does not check, that all of the entries in + lens[] are in the range 0..MAXBITS. The caller must assure this. + 1..MAXBITS is interpreted as that code length. zero means that that + symbol does not occur in this code. + + The codes are sorted by computing a count of codes for each length, + creating from that a table of starting indices for each length in the + sorted table, and then entering the symbols in order in the sorted + table. The sorted table is work[], with that space being provided by + the caller. + + The length counts are used for other purposes as well, i.e. finding + the minimum and maximum length codes, determining if there are any + codes at all, checking for a valid set of lengths, and looking ahead + at length counts to determine sub-table sizes when building the + decoding tables. + */ + + /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ + for (len = 0; len <= MAXBITS; len++) + count[len] = 0; + for (sym = 0; sym < codes; sym++) + count[lens[sym]]++; + + /* bound code lengths, force root to be within code lengths */ + root = *bits; + for (max = MAXBITS; max >= 1; max--) + if (count[max] != 0) break; + if (root > max) root = max; + if (max == 0) { /* no symbols to code at all */ + this.op = (unsigned char)64; /* invalid code marker */ + this.bits = (unsigned char)1; + this.val = (unsigned short)0; + *(*table)++ = this; /* make a table to force an error */ + *(*table)++ = this; + *bits = 1; + return 0; /* no symbols, but wait for decoding to report error */ + } + for (min = 1; min <= MAXBITS; min++) + if (count[min] != 0) break; + if (root < min) root = min; + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; + left -= count[len]; + if (left < 0) return -1; /* over-subscribed */ + } + if (left > 0 && (type == CODES || max != 1)) + return -1; /* incomplete set */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + count[len]; + + /* sort symbols by length, by symbol order within each length */ + for (sym = 0; sym < codes; sym++) + if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; + + /* + Create and fill in decoding tables. In this loop, the table being + filled is at next and has curr index bits. The code being used is huff + with length len. That code is converted to an index by dropping drop + bits off of the bottom. For codes where len is less than drop + curr, + those top drop + curr - len bits are incremented through all values to + fill the table with replicated entries. + + root is the number of index bits for the root table. When len exceeds + root, sub-tables are created pointed to by the root entry with an index + of the low root bits of huff. This is saved in low to check for when a + new sub-table should be started. drop is zero when the root table is + being filled, and drop is root when sub-tables are being filled. + + When a new sub-table is needed, it is necessary to look ahead in the + code lengths to determine what size sub-table is needed. The length + counts are used for this, and so count[] is decremented as codes are + entered in the tables. + + used keeps track of how many table entries have been allocated from the + provided *table space. It is checked when a LENS table is being made + against the space in *table, ENOUGH, minus the maximum space needed by + the worst case distance code, MAXD. This should never happen, but the + sufficiency of ENOUGH has not been proven exhaustively, hence the check. + This assumes that when type == LENS, bits == 9. + + sym increments through all symbols, and the loop terminates when + all codes of length max, i.e. all codes, have been processed. This + routine permits incomplete codes, so another loop after this one fills + in the rest of the decoding tables with invalid code markers. + */ + + /* set up for code type */ + switch (type) { + case CODES: + base = extra = work; /* dummy value--not used */ + end = 19; + break; + case LENS: + base = lbase; + base -= 257; + extra = lext; + extra -= 257; + end = 256; + break; + default: /* DISTS */ + base = dbase; + extra = dext; + end = -1; + } + + /* initialize state for loop */ + huff = 0; /* starting code */ + sym = 0; /* starting code symbol */ + len = min; /* starting code length */ + next = *table; /* current table to fill in */ + curr = root; /* current table index bits */ + drop = 0; /* current bits to drop from code for index */ + low = (unsigned)(-1); /* trigger new sub-table when len > root */ + used = 1U << root; /* use root table entries */ + mask = used - 1; /* mask for comparing low */ + + /* check available table space */ + if (type == LENS && used >= ENOUGH - MAXD) + return 1; + + /* process all codes and make table entries */ + for (;;) { + /* create table entry */ + this.bits = (unsigned char)(len - drop); + if ((int)(work[sym]) < end) { + this.op = (unsigned char)0; + this.val = work[sym]; + } + else if ((int)(work[sym]) > end) { + this.op = (unsigned char)(extra[work[sym]]); + this.val = base[work[sym]]; + } + else { + this.op = (unsigned char)(32 + 64); /* end of block */ + this.val = 0; + } + + /* replicate for those indices with low len bits equal to huff */ + incr = 1U << (len - drop); + fill = 1U << curr; + min = fill; /* save offset to next table */ + do { + fill -= incr; + next[(huff >> drop) + fill] = this; + } while (fill != 0); + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + + /* go to next symbol, update count, len */ + sym++; + if (--(count[len]) == 0) { + if (len == max) break; + len = lens[work[sym]]; + } + + /* create new sub-table if needed */ + if (len > root && (huff & mask) != low) { + /* if first time, transition to sub-tables */ + if (drop == 0) + drop = root; + + /* increment past last table */ + next += min; /* here min is 1 << curr */ + + /* determine length of next table */ + curr = len - drop; + left = (int)(1 << curr); + while (curr + drop < max) { + left -= count[curr + drop]; + if (left <= 0) break; + curr++; + left <<= 1; + } + + /* check for enough space */ + used += 1U << curr; + if (type == LENS && used >= ENOUGH - MAXD) + return 1; + + /* point entry in root table to sub-table */ + low = huff & mask; + (*table)[low].op = (unsigned char)curr; + (*table)[low].bits = (unsigned char)root; + (*table)[low].val = (unsigned short)(next - *table); + } + } + + /* + Fill in rest of table for incomplete codes. This loop is similar to the + loop above in incrementing huff for table indices. It is assumed that + len is equal to curr + drop, so there is no loop needed to increment + through high index bits. When the current sub-table is filled, the loop + drops back to the root table to fill in any remaining entries there. + */ + this.op = (unsigned char)64; /* invalid code marker */ + this.bits = (unsigned char)(len - drop); + this.val = (unsigned short)0; + while (huff != 0) { + /* when done with sub-table, drop back to root table */ + if (drop != 0 && (huff & mask) != low) { + drop = 0; + len = root; + next = *table; + this.bits = (unsigned char)len; + } + + /* put invalid code marker in table */ + next[huff >> drop] = this; + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + } + + /* set return parameters */ + *table += used; + *bits = root; + return 0; +} Added: external/zlib/inftrees.h ============================================================================== --- (empty file) +++ external/zlib/inftrees.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,55 @@ +/* inftrees.h -- header to use inftrees.c + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Structure for decoding tables. Each entry provides either the + information needed to do the operation requested by the code that + indexed that table entry, or it provides a pointer to another + table that indexes more bits of the code. op indicates whether + the entry is a pointer to another table, a literal, a length or + distance, an end-of-block, or an invalid code. For a table + pointer, the low four bits of op is the number of index bits of + that table. For a length or distance, the low four bits of op + is the number of extra bits to get after the code. bits is + the number of bits in this code or part of the code to drop off + of the bit buffer. val is the actual byte to output in the case + of a literal, the base length or distance, or the offset from + the current table to the next table. Each entry is four bytes. */ +typedef struct { + unsigned char op; /* operation, extra bits, table bits */ + unsigned char bits; /* bits in this part of the code */ + unsigned short val; /* offset in table or code value */ +} code; + +/* op values as set by inflate_table(): + 00000000 - literal + 0000tttt - table link, tttt != 0 is the number of table index bits + 0001eeee - length or distance, eeee is the number of extra bits + 01100000 - end of block + 01000000 - invalid code + */ + +/* Maximum size of dynamic tree. The maximum found in a long but non- + exhaustive search was 1444 code structures (852 for length/literals + and 592 for distances, the latter actually the result of an + exhaustive search). The true maximum is not known, but the value + below is more than safe. */ +#define ENOUGH 2048 +#define MAXD 592 + +/* Type of code to build for inftable() */ +typedef enum { + CODES, + LENS, + DISTS +} codetype; + +extern int inflate_table OF((codetype type, unsigned short FAR *lens, + unsigned codes, code FAR * FAR *table, + unsigned FAR *bits, unsigned short FAR *work)); Added: external/zlib/make_vms.com ============================================================================== --- (empty file) +++ external/zlib/make_vms.com Tue Jan 3 07:42:59 2006 @@ -0,0 +1,461 @@ +$! make libz under VMS written by +$! Martin P.J. Zinser +$! +$! +$ on error then goto err_exit +$! +$! +$! Just some general constants... +$! +$ true = 1 +$ false = 0 +$ tmpnam = "temp_" + f$getjpi("","pid") +$ SAY = "WRITE SYS$OUTPUT" +$! +$! Setup variables holding "config" information +$! +$ Make = "" +$ name = "Zlib" +$ version = "?.?.?" +$ v_string = "ZLIB_VERSION" +$ v_file = "zlib.h" +$ ccopt = "" +$ lopts = "" +$ linkonly = false +$ optfile = name + ".opt" +$ its_decc = false +$ its_vaxc = false +$ its_gnuc = false +$ axp = f$getsyi("HW_MODEL").ge.1024 +$ s_case = false +$! Check for MMK/MMS +$! +$ If F$Search ("Sys$System:MMS.EXE") .nes. "" Then Make = "MMS" +$ If F$Type (MMK) .eqs. "STRING" Then Make = "MMK" +$! +$! +$ gosub find_version +$! +$ gosub check_opts +$! +$! Look for the compiler used +$! +$ gosub check_compiler +$ if its_decc +$ then +$ ccopt = "/prefix=all" + ccopt +$ if f$trnlnm("SYS") .eqs. "" +$ then +$ if axp +$ then +$ define sys sys$library: +$ else +$ ccopt = "/decc" + ccopt +$ define sys decc$library_include: +$ endif +$ endif +$ endif +$ if its_vaxc .or. its_gnuc +$ then +$ if f$trnlnm("SYS").eqs."" then define sys sys$library: +$ endif +$! +$! Build the thing plain or with mms +$! +$ write sys$output "Compiling Zlib sources ..." +$ if make.eqs."" +$ then +$ dele example.obj;*,minigzip.obj;* +$ CALL MAKE adler32.OBJ "CC ''CCOPT' adler32" - + adler32.c zlib.h zconf.h +$ CALL MAKE compress.OBJ "CC ''CCOPT' compress" - + compress.c zlib.h zconf.h +$ CALL MAKE crc32.OBJ "CC ''CCOPT' crc32" - + crc32.c zlib.h zconf.h +$ CALL MAKE deflate.OBJ "CC ''CCOPT' deflate" - + deflate.c deflate.h zutil.h zlib.h zconf.h +$ CALL MAKE gzio.OBJ "CC ''CCOPT' gzio" - + gzio.c zutil.h zlib.h zconf.h +$ CALL MAKE infback.OBJ "CC ''CCOPT' infback" - + infback.c zutil.h inftrees.h inflate.h inffast.h inffixed.h +$ CALL MAKE inffast.OBJ "CC ''CCOPT' inffast" - + inffast.c zutil.h zlib.h zconf.h inffast.h +$ CALL MAKE inflate.OBJ "CC ''CCOPT' inflate" - + inflate.c zutil.h zlib.h zconf.h infblock.h +$ CALL MAKE inftrees.OBJ "CC ''CCOPT' inftrees" - + inftrees.c zutil.h zlib.h zconf.h inftrees.h +$ CALL MAKE trees.OBJ "CC ''CCOPT' trees" - + trees.c deflate.h zutil.h zlib.h zconf.h +$ CALL MAKE uncompr.OBJ "CC ''CCOPT' uncompr" - + uncompr.c zlib.h zconf.h +$ CALL MAKE zutil.OBJ "CC ''CCOPT' zutil" - + zutil.c zutil.h zlib.h zconf.h +$ write sys$output "Building Zlib ..." +$ CALL MAKE libz.OLB "lib/crea libz.olb *.obj" *.OBJ +$ write sys$output "Building example..." +$ CALL MAKE example.OBJ "CC ''CCOPT' example" - + example.c zlib.h zconf.h +$ call make example.exe "LINK example,libz.olb/lib" example.obj libz.olb +$ if f$search("x11vms:xvmsutils.olb") .nes. "" +$ then +$ write sys$output "Building minigzip..." +$ CALL MAKE minigzip.OBJ "CC ''CCOPT' minigzip" - + minigzip.c zlib.h zconf.h +$ call make minigzip.exe - + "LINK minigzip,libz.olb/lib,x11vms:xvmsutils.olb/lib" - + minigzip.obj libz.olb +$ endif +$ else +$ gosub crea_mms +$ SAY "Make ''name' ''version' with ''Make' " +$ 'make' +$ endif +$! +$! Alpha gets a shareable image +$! +$ If axp +$ Then +$ gosub crea_olist +$ write sys$output "Creating libzshr.exe" +$ call anal_obj_axp modules.opt _link.opt +$ if s_case +$ then +$ open/append optf modules.opt +$ write optf "case_sensitive=YES" +$ close optf +$ endif +$ LINK_'lopts'/SHARE=libzshr.exe modules.opt/opt,_link.opt/opt +$ endif +$ write sys$output "Zlib build completed" +$ exit +$CC_ERR: +$ write sys$output "C compiler required to build ''name'" +$ goto err_exit +$ERR_EXIT: +$ set message/facil/ident/sever/text +$ write sys$output "Exiting..." +$ exit 2 +$! +$! +$MAKE: SUBROUTINE !SUBROUTINE TO CHECK DEPENDENCIES +$ V = 'F$Verify(0) +$! P1 = What we are trying to make +$! P2 = Command to make it +$! P3 - P8 What it depends on +$ +$ If F$Search(P1) .Eqs. "" Then Goto Makeit +$ Time = F$CvTime(F$File(P1,"RDT")) +$arg=3 +$Loop: +$ Argument = P'arg +$ If Argument .Eqs. "" Then Goto Exit +$ El=0 +$Loop2: +$ File = F$Element(El," ",Argument) +$ If File .Eqs. " " Then Goto Endl +$ AFile = "" +$Loop3: +$ OFile = AFile +$ AFile = F$Search(File) +$ If AFile .Eqs. "" .Or. AFile .Eqs. OFile Then Goto NextEl +$ If F$CvTime(F$File(AFile,"RDT")) .Ges. Time Then Goto Makeit +$ Goto Loop3 +$NextEL: +$ El = El + 1 +$ Goto Loop2 +$EndL: +$ arg=arg+1 +$ If arg .Le. 8 Then Goto Loop +$ Goto Exit +$ +$Makeit: +$ VV=F$VERIFY(0) +$ write sys$output P2 +$ 'P2 +$ VV='F$Verify(VV) +$Exit: +$ If V Then Set Verify +$ENDSUBROUTINE +$!------------------------------------------------------------------------------ +$! +$! Check command line options and set symbols accordingly +$! +$ CHECK_OPTS: +$ i = 1 +$ OPT_LOOP: +$ if i .lt. 9 +$ then +$ cparm = f$edit(p'i',"upcase") +$ if cparm .eqs. "DEBUG" +$ then +$ ccopt = ccopt + "/noopt/deb" +$ lopts = lopts + "/deb" +$ endif +$ if f$locate("CCOPT=",cparm) .lt. f$length(cparm) +$ then +$ start = f$locate("=",cparm) + 1 +$ len = f$length(cparm) - start +$ ccopt = ccopt + f$extract(start,len,cparm) +$ if f$locate("AS_IS",f$edit(ccopt,"UPCASE")) .lt. f$length(ccopt) - + then s_case = true +$ endif +$ if cparm .eqs. "LINK" then linkonly = true +$ if f$locate("LOPTS=",cparm) .lt. f$length(cparm) +$ then +$ start = f$locate("=",cparm) + 1 +$ len = f$length(cparm) - start +$ lopts = lopts + f$extract(start,len,cparm) +$ endif +$ if f$locate("CC=",cparm) .lt. f$length(cparm) +$ then +$ start = f$locate("=",cparm) + 1 +$ len = f$length(cparm) - start +$ cc_com = f$extract(start,len,cparm) + if (cc_com .nes. "DECC") .and. - + (cc_com .nes. "VAXC") .and. - + (cc_com .nes. "GNUC") +$ then +$ write sys$output "Unsupported compiler choice ''cc_com' ignored" +$ write sys$output "Use DECC, VAXC, or GNUC instead" +$ else +$ if cc_com .eqs. "DECC" then its_decc = true +$ if cc_com .eqs. "VAXC" then its_vaxc = true +$ if cc_com .eqs. "GNUC" then its_gnuc = true +$ endif +$ endif +$ if f$locate("MAKE=",cparm) .lt. f$length(cparm) +$ then +$ start = f$locate("=",cparm) + 1 +$ len = f$length(cparm) - start +$ mmks = f$extract(start,len,cparm) +$ if (mmks .eqs. "MMK") .or. (mmks .eqs. "MMS") +$ then +$ make = mmks +$ else +$ write sys$output "Unsupported make choice ''mmks' ignored" +$ write sys$output "Use MMK or MMS instead" +$ endif +$ endif +$ i = i + 1 +$ goto opt_loop +$ endif +$ return +$!------------------------------------------------------------------------------ +$! +$! Look for the compiler used +$! +$CHECK_COMPILER: +$ if (.not. (its_decc .or. its_vaxc .or. its_gnuc)) +$ then +$ its_decc = (f$search("SYS$SYSTEM:DECC$COMPILER.EXE") .nes. "") +$ its_vaxc = .not. its_decc .and. (F$Search("SYS$System:VAXC.Exe") .nes. "") +$ its_gnuc = .not. (its_decc .or. its_vaxc) .and. (f$trnlnm("gnu_cc") .nes. "") +$ endif +$! +$! Exit if no compiler available +$! +$ if (.not. (its_decc .or. its_vaxc .or. its_gnuc)) +$ then goto CC_ERR +$ else +$ if its_decc then write sys$output "CC compiler check ... Compaq C" +$ if its_vaxc then write sys$output "CC compiler check ... VAX C" +$ if its_gnuc then write sys$output "CC compiler check ... GNU C" +$ endif +$ return +$!------------------------------------------------------------------------------ +$! +$! If MMS/MMK are available dump out the descrip.mms if required +$! +$CREA_MMS: +$ write sys$output "Creating descrip.mms..." +$ create descrip.mms +$ open/append out descrip.mms +$ copy sys$input: out +$ deck +# descrip.mms: MMS description file for building zlib on VMS +# written by Martin P.J. Zinser +# + +OBJS = adler32.obj, compress.obj, crc32.obj, gzio.obj, uncompr.obj, infback.obj\ + deflate.obj, trees.obj, zutil.obj, inflate.obj, \ + inftrees.obj, inffast.obj + +$ eod +$ write out "CFLAGS=", ccopt +$ write out "LOPTS=", lopts +$ copy sys$input: out +$ deck + +all : example.exe minigzip.exe libz.olb + @ write sys$output " Example applications available" + +libz.olb : libz.olb($(OBJS)) + @ write sys$output " libz available" + +example.exe : example.obj libz.olb + link $(LOPTS) example,libz.olb/lib + +minigzip.exe : minigzip.obj libz.olb + link $(LOPTS) minigzip,libz.olb/lib,x11vms:xvmsutils.olb/lib + +clean : + delete *.obj;*,libz.olb;*,*.opt;*,*.exe;* + + +# Other dependencies. +adler32.obj : adler32.c zutil.h zlib.h zconf.h +compress.obj : compress.c zlib.h zconf.h +crc32.obj : crc32.c zutil.h zlib.h zconf.h +deflate.obj : deflate.c deflate.h zutil.h zlib.h zconf.h +example.obj : example.c zlib.h zconf.h +gzio.obj : gzio.c zutil.h zlib.h zconf.h +inffast.obj : inffast.c zutil.h zlib.h zconf.h inftrees.h inffast.h +inflate.obj : inflate.c zutil.h zlib.h zconf.h +inftrees.obj : inftrees.c zutil.h zlib.h zconf.h inftrees.h +minigzip.obj : minigzip.c zlib.h zconf.h +trees.obj : trees.c deflate.h zutil.h zlib.h zconf.h +uncompr.obj : uncompr.c zlib.h zconf.h +zutil.obj : zutil.c zutil.h zlib.h zconf.h +infback.obj : infback.c zutil.h inftrees.h inflate.h inffast.h inffixed.h +$ eod +$ close out +$ return +$!------------------------------------------------------------------------------ +$! +$! Read list of core library sources from makefile.in and create options +$! needed to build shareable image +$! +$CREA_OLIST: +$ open/read min makefile.in +$ open/write mod modules.opt +$ src_check = "OBJS =" +$MRLOOP: +$ read/end=mrdone min rec +$ if (f$extract(0,6,rec) .nes. src_check) then goto mrloop +$ rec = rec - src_check +$ gosub extra_filnam +$ if (f$element(1,"\",rec) .eqs. "\") then goto mrdone +$MRSLOOP: +$ read/end=mrdone min rec +$ gosub extra_filnam +$ if (f$element(1,"\",rec) .nes. "\") then goto mrsloop +$MRDONE: +$ close min +$ close mod +$ return +$!------------------------------------------------------------------------------ +$! +$! Take record extracted in crea_olist and split it into single filenames +$! +$EXTRA_FILNAM: +$ myrec = f$edit(rec - "\", "trim,compress") +$ i = 0 +$FELOOP: +$ srcfil = f$element(i," ", myrec) +$ if (srcfil .nes. " ") +$ then +$ write mod f$parse(srcfil,,,"NAME"), ".obj" +$ i = i + 1 +$ goto feloop +$ endif +$ return +$!------------------------------------------------------------------------------ +$! +$! Find current Zlib version number +$! +$FIND_VERSION: +$ open/read h_in 'v_file' +$hloop: +$ read/end=hdone h_in rec +$ rec = f$edit(rec,"TRIM") +$ if (f$extract(0,1,rec) .nes. "#") then goto hloop +$ rec = f$edit(rec - "#", "TRIM") +$ if f$element(0," ",rec) .nes. "define" then goto hloop +$ if f$element(1," ",rec) .eqs. v_string +$ then +$ version = 'f$element(2," ",rec)' +$ goto hdone +$ endif +$ goto hloop +$hdone: +$ close h_in +$ return +$!------------------------------------------------------------------------------ +$! +$! Analyze Object files for OpenVMS AXP to extract Procedure and Data +$! information to build a symbol vector for a shareable image +$! All the "brains" of this logic was suggested by Hartmut Becker +$! (Hartmut.Becker at compaq.com). All the bugs were introduced by me +$! (zinser at decus.de), so if you do have problem reports please do not +$! bother Hartmut/HP, but get in touch with me +$! +$ ANAL_OBJ_AXP: Subroutine +$ V = 'F$Verify(0) +$ SAY := "WRITE_ SYS$OUTPUT" +$ +$ IF F$SEARCH("''P1'") .EQS. "" +$ THEN +$ SAY "ANAL_OBJ_AXP-E-NOSUCHFILE: Error, inputfile ''p1' not available" +$ goto exit_aa +$ ENDIF +$ IF "''P2'" .EQS. "" +$ THEN +$ SAY "ANAL_OBJ_AXP: Error, no output file provided" +$ goto exit_aa +$ ENDIF +$ +$ open/read in 'p1 +$ create a.tmp +$ open/append atmp a.tmp +$ loop: +$ read/end=end_loop in line +$ f= f$search(line) +$ if f .eqs. "" +$ then +$ write sys$output "ANAL_OBJ_AXP-w-nosuchfile, ''line'" +$ goto loop +$ endif +$ define/user sys$output nl: +$ define/user sys$error nl: +$ anal/obj/gsd 'f /out=x.tmp +$ open/read xtmp x.tmp +$ XLOOP: +$ read/end=end_xloop xtmp xline +$ xline = f$edit(xline,"compress") +$ write atmp xline +$ goto xloop +$ END_XLOOP: +$ close xtmp +$ goto loop +$ end_loop: +$ close in +$ close atmp +$ if f$search("a.tmp") .eqs. "" - + then $ exit +$ ! all global definitions +$ search a.tmp "symbol:","EGSY$V_DEF 1","EGSY$V_NORM 1"/out=b.tmp +$ ! all procedures +$ search b.tmp "EGSY$V_NORM 1"/wind=(0,1) /out=c.tmp +$ search c.tmp "symbol:"/out=d.tmp +$ define/user sys$output nl: +$ edito/edt/command=sys$input d.tmp +sub/symbol: "/symbol_vector=(/whole +sub/"/=PROCEDURE)/whole +exit +$ ! all data +$ search b.tmp "EGSY$V_DEF 1"/wind=(0,1) /out=e.tmp +$ search e.tmp "symbol:"/out=f.tmp +$ define/user sys$output nl: +$ edito/edt/command=sys$input f.tmp +sub/symbol: "/symbol_vector=(/whole +sub/"/=DATA)/whole +exit +$ sort/nodupl d.tmp,f.tmp 'p2' +$ delete a.tmp;*,b.tmp;*,c.tmp;*,d.tmp;*,e.tmp;*,f.tmp;* +$ if f$search("x.tmp") .nes. "" - + then $ delete x.tmp;* +$! +$ EXIT_AA: +$ if V then set verify +$ endsubroutine +$!------------------------------------------------------------------------------ Added: external/zlib/minigzip.c ============================================================================== --- (empty file) +++ external/zlib/minigzip.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,322 @@ +/* minigzip.c -- simulate gzip using the zlib compression library + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * minigzip is a minimal implementation of the gzip utility. This is + * only an example of using zlib and isn't meant to replace the + * full-featured gzip. No attempt is made to deal with file systems + * limiting names to 14 or 8+3 characters, etc... Error checking is + * very limited. So use minigzip only for testing; use gzip for the + * real thing. On MSDOS, use only on file names without extension + * or in pipe mode. + */ + +/* @(#) $Id$ */ + +#include +#include "zlib.h" + +#ifdef STDC +# include +# include +#endif + +#ifdef USE_MMAP +# include +# include +# include +#endif + +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) +# include +# include +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif + +#ifdef VMS +# define unlink delete +# define GZ_SUFFIX "-gz" +#endif +#ifdef RISCOS +# define unlink remove +# define GZ_SUFFIX "-gz" +# define fileno(file) file->__file +#endif +#if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os +# include /* for fileno */ +#endif + +#ifndef WIN32 /* unlink already in stdio.h for WIN32 */ + extern int unlink OF((const char *)); +#endif + +#ifndef GZ_SUFFIX +# define GZ_SUFFIX ".gz" +#endif +#define SUFFIX_LEN (sizeof(GZ_SUFFIX)-1) + +#define BUFLEN 16384 +#define MAX_NAME_LEN 1024 + +#ifdef MAXSEG_64K +# define local static + /* Needed for systems with limitation on stack size. */ +#else +# define local +#endif + +char *prog; + +void error OF((const char *msg)); +void gz_compress OF((FILE *in, gzFile out)); +#ifdef USE_MMAP +int gz_compress_mmap OF((FILE *in, gzFile out)); +#endif +void gz_uncompress OF((gzFile in, FILE *out)); +void file_compress OF((char *file, char *mode)); +void file_uncompress OF((char *file)); +int main OF((int argc, char *argv[])); + +/* =========================================================================== + * Display error message and exit + */ +void error(msg) + const char *msg; +{ + fprintf(stderr, "%s: %s\n", prog, msg); + exit(1); +} + +/* =========================================================================== + * Compress input to output then close both files. + */ + +void gz_compress(in, out) + FILE *in; + gzFile out; +{ + local char buf[BUFLEN]; + int len; + int err; + +#ifdef USE_MMAP + /* Try first compressing with mmap. If mmap fails (minigzip used in a + * pipe), use the normal fread loop. + */ + if (gz_compress_mmap(in, out) == Z_OK) return; +#endif + for (;;) { + len = (int)fread(buf, 1, sizeof(buf), in); + if (ferror(in)) { + perror("fread"); + exit(1); + } + if (len == 0) break; + + if (gzwrite(out, buf, (unsigned)len) != len) error(gzerror(out, &err)); + } + fclose(in); + if (gzclose(out) != Z_OK) error("failed gzclose"); +} + +#ifdef USE_MMAP /* MMAP version, Miguel Albrecht */ + +/* Try compressing the input file at once using mmap. Return Z_OK if + * if success, Z_ERRNO otherwise. + */ +int gz_compress_mmap(in, out) + FILE *in; + gzFile out; +{ + int len; + int err; + int ifd = fileno(in); + caddr_t buf; /* mmap'ed buffer for the entire input file */ + off_t buf_len; /* length of the input file */ + struct stat sb; + + /* Determine the size of the file, needed for mmap: */ + if (fstat(ifd, &sb) < 0) return Z_ERRNO; + buf_len = sb.st_size; + if (buf_len <= 0) return Z_ERRNO; + + /* Now do the actual mmap: */ + buf = mmap((caddr_t) 0, buf_len, PROT_READ, MAP_SHARED, ifd, (off_t)0); + if (buf == (caddr_t)(-1)) return Z_ERRNO; + + /* Compress the whole file at once: */ + len = gzwrite(out, (char *)buf, (unsigned)buf_len); + + if (len != (int)buf_len) error(gzerror(out, &err)); + + munmap(buf, buf_len); + fclose(in); + if (gzclose(out) != Z_OK) error("failed gzclose"); + return Z_OK; +} +#endif /* USE_MMAP */ + +/* =========================================================================== + * Uncompress input to output then close both files. + */ +void gz_uncompress(in, out) + gzFile in; + FILE *out; +{ + local char buf[BUFLEN]; + int len; + int err; + + for (;;) { + len = gzread(in, buf, sizeof(buf)); + if (len < 0) error (gzerror(in, &err)); + if (len == 0) break; + + if ((int)fwrite(buf, 1, (unsigned)len, out) != len) { + error("failed fwrite"); + } + } + if (fclose(out)) error("failed fclose"); + + if (gzclose(in) != Z_OK) error("failed gzclose"); +} + + +/* =========================================================================== + * Compress the given file: create a corresponding .gz file and remove the + * original. + */ +void file_compress(file, mode) + char *file; + char *mode; +{ + local char outfile[MAX_NAME_LEN]; + FILE *in; + gzFile out; + + strcpy(outfile, file); + strcat(outfile, GZ_SUFFIX); + + in = fopen(file, "rb"); + if (in == NULL) { + perror(file); + exit(1); + } + out = gzopen(outfile, mode); + if (out == NULL) { + fprintf(stderr, "%s: can't gzopen %s\n", prog, outfile); + exit(1); + } + gz_compress(in, out); + + unlink(file); +} + + +/* =========================================================================== + * Uncompress the given file and remove the original. + */ +void file_uncompress(file) + char *file; +{ + local char buf[MAX_NAME_LEN]; + char *infile, *outfile; + FILE *out; + gzFile in; + uInt len = (uInt)strlen(file); + + strcpy(buf, file); + + if (len > SUFFIX_LEN && strcmp(file+len-SUFFIX_LEN, GZ_SUFFIX) == 0) { + infile = file; + outfile = buf; + outfile[len-3] = '\0'; + } else { + outfile = file; + infile = buf; + strcat(infile, GZ_SUFFIX); + } + in = gzopen(infile, "rb"); + if (in == NULL) { + fprintf(stderr, "%s: can't gzopen %s\n", prog, infile); + exit(1); + } + out = fopen(outfile, "wb"); + if (out == NULL) { + perror(file); + exit(1); + } + + gz_uncompress(in, out); + + unlink(infile); +} + + +/* =========================================================================== + * Usage: minigzip [-d] [-f] [-h] [-r] [-1 to -9] [files...] + * -d : decompress + * -f : compress with Z_FILTERED + * -h : compress with Z_HUFFMAN_ONLY + * -r : compress with Z_RLE + * -1 to -9 : compression level + */ + +int main(argc, argv) + int argc; + char *argv[]; +{ + int uncompr = 0; + gzFile file; + char outmode[20]; + + strcpy(outmode, "wb6 "); + + prog = argv[0]; + argc--, argv++; + + while (argc > 0) { + if (strcmp(*argv, "-d") == 0) + uncompr = 1; + else if (strcmp(*argv, "-f") == 0) + outmode[3] = 'f'; + else if (strcmp(*argv, "-h") == 0) + outmode[3] = 'h'; + else if (strcmp(*argv, "-r") == 0) + outmode[3] = 'R'; + else if ((*argv)[0] == '-' && (*argv)[1] >= '1' && (*argv)[1] <= '9' && + (*argv)[2] == 0) + outmode[2] = (*argv)[1]; + else + break; + argc--, argv++; + } + if (outmode[3] == ' ') + outmode[3] = 0; + if (argc == 0) { + SET_BINARY_MODE(stdin); + SET_BINARY_MODE(stdout); + if (uncompr) { + file = gzdopen(fileno(stdin), "rb"); + if (file == NULL) error("can't gzdopen stdin"); + gz_uncompress(file, stdout); + } else { + file = gzdopen(fileno(stdout), outmode); + if (file == NULL) error("can't gzdopen stdout"); + gz_compress(stdin, file); + } + } else { + do { + if (uncompr) { + file_uncompress(*argv); + } else { + file_compress(*argv, outmode); + } + } while (argv++, --argc); + } + return 0; +} Added: external/zlib/msdos/Makefile.bor ============================================================================== --- (empty file) +++ external/zlib/msdos/Makefile.bor Tue Jan 3 07:42:59 2006 @@ -0,0 +1,109 @@ +# Makefile for zlib +# Borland C++ +# Last updated: 15-Mar-2003 + +# To use, do "make -fmakefile.bor" +# To compile in small model, set below: MODEL=s + +# WARNING: the small model is supported but only for small values of +# MAX_WBITS and MAX_MEM_LEVEL. For example: +# -DMAX_WBITS=11 -DDEF_WBITS=11 -DMAX_MEM_LEVEL=3 +# If you wish to reduce the memory requirements (default 256K for big +# objects plus a few K), you can add to the LOC macro below: +# -DMAX_MEM_LEVEL=7 -DMAX_WBITS=14 +# See zconf.h for details about the memory requirements. + +# ------------ Turbo C++, Borland C++ ------------ + +# Optional nonstandard preprocessor flags (e.g. -DMAX_MEM_LEVEL=7) +# should be added to the environment via "set LOCAL_ZLIB=-DFOO" or added +# to the declaration of LOC here: +LOC = $(LOCAL_ZLIB) + +# type for CPU required: 0: 8086, 1: 80186, 2: 80286, 3: 80386, etc. +CPU_TYP = 0 + +# memory model: one of s, m, c, l (small, medium, compact, large) +MODEL=l + +# replace bcc with tcc for Turbo C++ 1.0, with bcc32 for the 32 bit version +CC=bcc +LD=bcc +AR=tlib + +# compiler flags +# replace "-O2" by "-O -G -a -d" for Turbo C++ 1.0 +CFLAGS=-O2 -Z -m$(MODEL) $(LOC) + +LDFLAGS=-m$(MODEL) -f- + + +# variables +ZLIB_LIB = zlib_$(MODEL).lib + +OBJ1 = adler32.obj compress.obj crc32.obj deflate.obj gzio.obj infback.obj +OBJ2 = inffast.obj inflate.obj inftrees.obj trees.obj uncompr.obj zutil.obj +OBJP1 = +adler32.obj+compress.obj+crc32.obj+deflate.obj+gzio.obj+infback.obj +OBJP2 = +inffast.obj+inflate.obj+inftrees.obj+trees.obj+uncompr.obj+zutil.obj + + +# targets +all: $(ZLIB_LIB) example.exe minigzip.exe + +.c.obj: + $(CC) -c $(CFLAGS) $*.c + +adler32.obj: adler32.c zlib.h zconf.h + +compress.obj: compress.c zlib.h zconf.h + +crc32.obj: crc32.c zlib.h zconf.h crc32.h + +deflate.obj: deflate.c deflate.h zutil.h zlib.h zconf.h + +gzio.obj: gzio.c zutil.h zlib.h zconf.h + +infback.obj: infback.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h inffixed.h + +inffast.obj: inffast.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h + +inflate.obj: inflate.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h inffixed.h + +inftrees.obj: inftrees.c zutil.h zlib.h zconf.h inftrees.h + +trees.obj: trees.c zutil.h zlib.h zconf.h deflate.h trees.h + +uncompr.obj: uncompr.c zlib.h zconf.h + +zutil.obj: zutil.c zutil.h zlib.h zconf.h + +example.obj: example.c zlib.h zconf.h + +minigzip.obj: minigzip.c zlib.h zconf.h + + +# the command line is cut to fit in the MS-DOS 128 byte limit: +$(ZLIB_LIB): $(OBJ1) $(OBJ2) + -del $(ZLIB_LIB) + $(AR) $(ZLIB_LIB) $(OBJP1) + $(AR) $(ZLIB_LIB) $(OBJP2) + +example.exe: example.obj $(ZLIB_LIB) + $(LD) $(LDFLAGS) example.obj $(ZLIB_LIB) + +minigzip.exe: minigzip.obj $(ZLIB_LIB) + $(LD) $(LDFLAGS) minigzip.obj $(ZLIB_LIB) + +test: example.exe minigzip.exe + example + echo hello world | minigzip | minigzip -d + +clean: + -del *.obj + -del *.lib + -del *.exe + -del zlib_*.bak + -del foo.gz Added: external/zlib/msdos/Makefile.dj2 ============================================================================== --- (empty file) +++ external/zlib/msdos/Makefile.dj2 Tue Jan 3 07:42:59 2006 @@ -0,0 +1,104 @@ +# Makefile for zlib. Modified for djgpp v2.0 by F. J. Donahoe, 3/15/96. +# Copyright (C) 1995-1998 Jean-loup Gailly. +# For conditions of distribution and use, see copyright notice in zlib.h + +# To compile, or to compile and test, type: +# +# make -fmakefile.dj2; make test -fmakefile.dj2 +# +# To install libz.a, zconf.h and zlib.h in the djgpp directories, type: +# +# make install -fmakefile.dj2 +# +# after first defining LIBRARY_PATH and INCLUDE_PATH in djgpp.env as +# in the sample below if the pattern of the DJGPP distribution is to +# be followed. Remember that, while 'es around <=> are ignored in +# makefiles, they are *not* in batch files or in djgpp.env. +# - - - - - +# [make] +# INCLUDE_PATH=%\>;INCLUDE_PATH%%\DJDIR%\include +# LIBRARY_PATH=%\>;LIBRARY_PATH%%\DJDIR%\lib +# BUTT=-m486 +# - - - - - +# Alternately, these variables may be defined below, overriding the values +# in djgpp.env, as +# INCLUDE_PATH=c:\usr\include +# LIBRARY_PATH=c:\usr\lib + +CC=gcc + +#CFLAGS=-MMD -O +#CFLAGS=-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7 +#CFLAGS=-MMD -g -DDEBUG +CFLAGS=-MMD -O3 $(BUTT) -Wall -Wwrite-strings -Wpointer-arith -Wconversion \ + -Wstrict-prototypes -Wmissing-prototypes + +# If cp.exe is available, replace "copy /Y" with "cp -fp" . +CP=copy /Y +# If gnu install.exe is available, replace $(CP) with ginstall. +INSTALL=$(CP) +# The default value of RM is "rm -f." If "rm.exe" is found, comment out: +RM=del +LDLIBS=-L. -lz +LD=$(CC) -s -o +LDSHARED=$(CC) + +INCL=zlib.h zconf.h +LIBS=libz.a + +AR=ar rcs + +prefix=/usr/local +exec_prefix = $(prefix) + +OBJS = adler32.o compress.o crc32.o gzio.o uncompr.o deflate.o trees.o \ + zutil.o inflate.o infback.o inftrees.o inffast.o + +OBJA = +# to use the asm code: make OBJA=match.o + +TEST_OBJS = example.o minigzip.o + +all: example.exe minigzip.exe + +check: test +test: all + ./example + echo hello world | .\minigzip | .\minigzip -d + +%.o : %.c + $(CC) $(CFLAGS) -c $< -o $@ + +libz.a: $(OBJS) $(OBJA) + $(AR) $@ $(OBJS) $(OBJA) + +%.exe : %.o $(LIBS) + $(LD) $@ $< $(LDLIBS) + +# INCLUDE_PATH and LIBRARY_PATH were set for [make] in djgpp.env . + +.PHONY : uninstall clean + +install: $(INCL) $(LIBS) + - at if not exist $(INCLUDE_PATH)\nul mkdir $(INCLUDE_PATH) + - at if not exist $(LIBRARY_PATH)\nul mkdir $(LIBRARY_PATH) + $(INSTALL) zlib.h $(INCLUDE_PATH) + $(INSTALL) zconf.h $(INCLUDE_PATH) + $(INSTALL) libz.a $(LIBRARY_PATH) + +uninstall: + $(RM) $(INCLUDE_PATH)\zlib.h + $(RM) $(INCLUDE_PATH)\zconf.h + $(RM) $(LIBRARY_PATH)\libz.a + +clean: + $(RM) *.d + $(RM) *.o + $(RM) *.exe + $(RM) libz.a + $(RM) foo.gz + +DEPS := $(wildcard *.d) +ifneq ($(DEPS),) +include $(DEPS) +endif Added: external/zlib/msdos/Makefile.emx ============================================================================== --- (empty file) +++ external/zlib/msdos/Makefile.emx Tue Jan 3 07:42:59 2006 @@ -0,0 +1,69 @@ +# Makefile for zlib. Modified for emx 0.9c by Chr. Spieler, 6/17/98. +# Copyright (C) 1995-1998 Jean-loup Gailly. +# For conditions of distribution and use, see copyright notice in zlib.h + +# To compile, or to compile and test, type: +# +# make -fmakefile.emx; make test -fmakefile.emx +# + +CC=gcc + +#CFLAGS=-MMD -O +#CFLAGS=-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7 +#CFLAGS=-MMD -g -DDEBUG +CFLAGS=-MMD -O3 $(BUTT) -Wall -Wwrite-strings -Wpointer-arith -Wconversion \ + -Wstrict-prototypes -Wmissing-prototypes + +# If cp.exe is available, replace "copy /Y" with "cp -fp" . +CP=copy /Y +# If gnu install.exe is available, replace $(CP) with ginstall. +INSTALL=$(CP) +# The default value of RM is "rm -f." If "rm.exe" is found, comment out: +RM=del +LDLIBS=-L. -lzlib +LD=$(CC) -s -o +LDSHARED=$(CC) + +INCL=zlib.h zconf.h +LIBS=zlib.a + +AR=ar rcs + +prefix=/usr/local +exec_prefix = $(prefix) + +OBJS = adler32.o compress.o crc32.o gzio.o uncompr.o deflate.o trees.o \ + zutil.o inflate.o infback.o inftrees.o inffast.o + +TEST_OBJS = example.o minigzip.o + +all: example.exe minigzip.exe + +test: all + ./example + echo hello world | .\minigzip | .\minigzip -d + +%.o : %.c + $(CC) $(CFLAGS) -c $< -o $@ + +zlib.a: $(OBJS) + $(AR) $@ $(OBJS) + +%.exe : %.o $(LIBS) + $(LD) $@ $< $(LDLIBS) + + +.PHONY : clean + +clean: + $(RM) *.d + $(RM) *.o + $(RM) *.exe + $(RM) zlib.a + $(RM) foo.gz + +DEPS := $(wildcard *.d) +ifneq ($(DEPS),) +include $(DEPS) +endif Added: external/zlib/msdos/Makefile.msc ============================================================================== --- (empty file) +++ external/zlib/msdos/Makefile.msc Tue Jan 3 07:42:59 2006 @@ -0,0 +1,106 @@ +# Makefile for zlib +# Microsoft C 5.1 or later +# Last updated: 19-Mar-2003 + +# To use, do "make makefile.msc" +# To compile in small model, set below: MODEL=S + +# If you wish to reduce the memory requirements (default 256K for big +# objects plus a few K), you can add to the LOC macro below: +# -DMAX_MEM_LEVEL=7 -DMAX_WBITS=14 +# See zconf.h for details about the memory requirements. + +# ------------- Microsoft C 5.1 and later ------------- + +# Optional nonstandard preprocessor flags (e.g. -DMAX_MEM_LEVEL=7) +# should be added to the environment via "set LOCAL_ZLIB=-DFOO" or added +# to the declaration of LOC here: +LOC = $(LOCAL_ZLIB) + +# Type for CPU required: 0: 8086, 1: 80186, 2: 80286, 3: 80386, etc. +CPU_TYP = 0 + +# Memory model: one of S, M, C, L (small, medium, compact, large) +MODEL=L + +CC=cl +CFLAGS=-nologo -A$(MODEL) -G$(CPU_TYP) -W3 -Oait -Gs $(LOC) +#-Ox generates bad code with MSC 5.1 +LIB_CFLAGS=-Zl $(CFLAGS) + +LD=link +LDFLAGS=/noi/e/st:0x1500/noe/farcall/packcode +# "/farcall/packcode" are only useful for `large code' memory models +# but should be a "no-op" for small code models. + + +# variables +ZLIB_LIB = zlib_$(MODEL).lib + +OBJ1 = adler32.obj compress.obj crc32.obj deflate.obj gzio.obj infback.obj +OBJ2 = inffast.obj inflate.obj inftrees.obj trees.obj uncompr.obj zutil.obj + + +# targets +all: $(ZLIB_LIB) example.exe minigzip.exe + +.c.obj: + $(CC) -c $(LIB_CFLAGS) $*.c + +adler32.obj: adler32.c zlib.h zconf.h + +compress.obj: compress.c zlib.h zconf.h + +crc32.obj: crc32.c zlib.h zconf.h crc32.h + +deflate.obj: deflate.c deflate.h zutil.h zlib.h zconf.h + +gzio.obj: gzio.c zutil.h zlib.h zconf.h + +infback.obj: infback.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h inffixed.h + +inffast.obj: inffast.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h + +inflate.obj: inflate.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h inffixed.h + +inftrees.obj: inftrees.c zutil.h zlib.h zconf.h inftrees.h + +trees.obj: trees.c zutil.h zlib.h zconf.h deflate.h trees.h + +uncompr.obj: uncompr.c zlib.h zconf.h + +zutil.obj: zutil.c zutil.h zlib.h zconf.h + +example.obj: example.c zlib.h zconf.h + $(CC) -c $(CFLAGS) $*.c + +minigzip.obj: minigzip.c zlib.h zconf.h + $(CC) -c $(CFLAGS) $*.c + + +# the command line is cut to fit in the MS-DOS 128 byte limit: +$(ZLIB_LIB): $(OBJ1) $(OBJ2) + if exist $(ZLIB_LIB) del $(ZLIB_LIB) + lib $(ZLIB_LIB) $(OBJ1); + lib $(ZLIB_LIB) $(OBJ2); + +example.exe: example.obj $(ZLIB_LIB) + $(LD) $(LDFLAGS) example.obj,,,$(ZLIB_LIB); + +minigzip.exe: minigzip.obj $(ZLIB_LIB) + $(LD) $(LDFLAGS) minigzip.obj,,,$(ZLIB_LIB); + +test: example.exe minigzip.exe + example + echo hello world | minigzip | minigzip -d + +clean: + -del *.obj + -del *.lib + -del *.exe + -del *.map + -del zlib_*.bak + -del foo.gz Added: external/zlib/msdos/Makefile.tc ============================================================================== --- (empty file) +++ external/zlib/msdos/Makefile.tc Tue Jan 3 07:42:59 2006 @@ -0,0 +1,94 @@ +# Makefile for zlib +# Turbo C 2.01, Turbo C++ 1.01 +# Last updated: 15-Mar-2003 + +# To use, do "make -fmakefile.tc" +# To compile in small model, set below: MODEL=s + +# WARNING: the small model is supported but only for small values of +# MAX_WBITS and MAX_MEM_LEVEL. For example: +# -DMAX_WBITS=11 -DMAX_MEM_LEVEL=3 +# If you wish to reduce the memory requirements (default 256K for big +# objects plus a few K), you can add to CFLAGS below: +# -DMAX_MEM_LEVEL=7 -DMAX_WBITS=14 +# See zconf.h for details about the memory requirements. + +# ------------ Turbo C 2.01, Turbo C++ 1.01 ------------ +MODEL=l +CC=tcc +LD=tcc +AR=tlib +# CFLAGS=-O2 -G -Z -m$(MODEL) -DMAX_WBITS=11 -DMAX_MEM_LEVEL=3 +CFLAGS=-O2 -G -Z -m$(MODEL) +LDFLAGS=-m$(MODEL) -f- + + +# variables +ZLIB_LIB = zlib_$(MODEL).lib + +OBJ1 = adler32.obj compress.obj crc32.obj deflate.obj gzio.obj infback.obj +OBJ2 = inffast.obj inflate.obj inftrees.obj trees.obj uncompr.obj zutil.obj +OBJP1 = +adler32.obj+compress.obj+crc32.obj+deflate.obj+gzio.obj+infback.obj +OBJP2 = +inffast.obj+inflate.obj+inftrees.obj+trees.obj+uncompr.obj+zutil.obj + + +# targets +all: $(ZLIB_LIB) example.exe minigzip.exe + +.c.obj: + $(CC) -c $(CFLAGS) $*.c + +adler32.obj: adler32.c zlib.h zconf.h + +compress.obj: compress.c zlib.h zconf.h + +crc32.obj: crc32.c zlib.h zconf.h crc32.h + +deflate.obj: deflate.c deflate.h zutil.h zlib.h zconf.h + +gzio.obj: gzio.c zutil.h zlib.h zconf.h + +infback.obj: infback.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h inffixed.h + +inffast.obj: inffast.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h + +inflate.obj: inflate.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h inffixed.h + +inftrees.obj: inftrees.c zutil.h zlib.h zconf.h inftrees.h + +trees.obj: trees.c zutil.h zlib.h zconf.h deflate.h trees.h + +uncompr.obj: uncompr.c zlib.h zconf.h + +zutil.obj: zutil.c zutil.h zlib.h zconf.h + +example.obj: example.c zlib.h zconf.h + +minigzip.obj: minigzip.c zlib.h zconf.h + + +# the command line is cut to fit in the MS-DOS 128 byte limit: +$(ZLIB_LIB): $(OBJ1) $(OBJ2) + -del $(ZLIB_LIB) + $(AR) $(ZLIB_LIB) $(OBJP1) + $(AR) $(ZLIB_LIB) $(OBJP2) + +example.exe: example.obj $(ZLIB_LIB) + $(LD) $(LDFLAGS) example.obj $(ZLIB_LIB) + +minigzip.exe: minigzip.obj $(ZLIB_LIB) + $(LD) $(LDFLAGS) minigzip.obj $(ZLIB_LIB) + +test: example.exe minigzip.exe + example + echo hello world | minigzip | minigzip -d + +clean: + -del *.obj + -del *.lib + -del *.exe + -del zlib_*.bak + -del foo.gz Added: external/zlib/old/Makefile.riscos ============================================================================== --- (empty file) +++ external/zlib/old/Makefile.riscos Tue Jan 3 07:42:59 2006 @@ -0,0 +1,151 @@ +# Project: zlib_1_03 +# Patched for zlib 1.1.2 rw at shadow.org.uk 19980430 +# test works out-of-the-box, installs `somewhere' on demand + +# Toolflags: +CCflags = -c -depend !Depend -IC: -g -throwback -DRISCOS -fah +C++flags = -c -depend !Depend -IC: -throwback +Linkflags = -aif -c++ -o $@ +ObjAsmflags = -throwback -NoCache -depend !Depend +CMHGflags = +LibFileflags = -c -l -o $@ +Squeezeflags = -o $@ + +# change the line below to where _you_ want the library installed. +libdest = lib:zlib + +# Final targets: + at .lib: @.o.adler32 @.o.compress @.o.crc32 @.o.deflate @.o.gzio \ + @.o.infblock @.o.infcodes @.o.inffast @.o.inflate @.o.inftrees @.o.infutil @.o.trees \ + @.o.uncompr @.o.zutil + LibFile $(LibFileflags) @.o.adler32 @.o.compress @.o.crc32 @.o.deflate \ + @.o.gzio @.o.infblock @.o.infcodes @.o.inffast @.o.inflate @.o.inftrees @.o.infutil \ + @.o.trees @.o.uncompr @.o.zutil +test: @.minigzip @.example @.lib + @copy @.lib @.libc A~C~DF~L~N~P~Q~RS~TV + @echo running tests: hang on. + @/@.minigzip -f -9 libc + @/@.minigzip -d libc-gz + @/@.minigzip -f -1 libc + @/@.minigzip -d libc-gz + @/@.minigzip -h -9 libc + @/@.minigzip -d libc-gz + @/@.minigzip -h -1 libc + @/@.minigzip -d libc-gz + @/@.minigzip -9 libc + @/@.minigzip -d libc-gz + @/@.minigzip -1 libc + @/@.minigzip -d libc-gz + @diff @.lib @.libc + @echo that should have reported '@.lib and @.libc identical' if you have diff. + @/@.example @.fred @.fred + @echo that will have given lots of hello!'s. + + at .minigzip: @.o.minigzip @.lib C:o.Stubs + Link $(Linkflags) @.o.minigzip @.lib C:o.Stubs + at .example: @.o.example @.lib C:o.Stubs + Link $(Linkflags) @.o.example @.lib C:o.Stubs + +install: @.lib + cdir $(libdest) + cdir $(libdest).h + @copy @.h.zlib $(libdest).h.zlib A~C~DF~L~N~P~Q~RS~TV + @copy @.h.zconf $(libdest).h.zconf A~C~DF~L~N~P~Q~RS~TV + @copy @.lib $(libdest).lib A~C~DF~L~N~P~Q~RS~TV + @echo okay, installed zlib in $(libdest) + +clean:; remove @.minigzip + remove @.example + remove @.libc + -wipe @.o.* F~r~cV + remove @.fred + +# User-editable dependencies: +.c.o: + cc $(ccflags) -o $@ $< + +# Static dependencies: + +# Dynamic dependencies: +o.example: c.example +o.example: h.zlib +o.example: h.zconf +o.minigzip: c.minigzip +o.minigzip: h.zlib +o.minigzip: h.zconf +o.adler32: c.adler32 +o.adler32: h.zlib +o.adler32: h.zconf +o.compress: c.compress +o.compress: h.zlib +o.compress: h.zconf +o.crc32: c.crc32 +o.crc32: h.zlib +o.crc32: h.zconf +o.deflate: c.deflate +o.deflate: h.deflate +o.deflate: h.zutil +o.deflate: h.zlib +o.deflate: h.zconf +o.gzio: c.gzio +o.gzio: h.zutil +o.gzio: h.zlib +o.gzio: h.zconf +o.infblock: c.infblock +o.infblock: h.zutil +o.infblock: h.zlib +o.infblock: h.zconf +o.infblock: h.infblock +o.infblock: h.inftrees +o.infblock: h.infcodes +o.infblock: h.infutil +o.infcodes: c.infcodes +o.infcodes: h.zutil +o.infcodes: h.zlib +o.infcodes: h.zconf +o.infcodes: h.inftrees +o.infcodes: h.infblock +o.infcodes: h.infcodes +o.infcodes: h.infutil +o.infcodes: h.inffast +o.inffast: c.inffast +o.inffast: h.zutil +o.inffast: h.zlib +o.inffast: h.zconf +o.inffast: h.inftrees +o.inffast: h.infblock +o.inffast: h.infcodes +o.inffast: h.infutil +o.inffast: h.inffast +o.inflate: c.inflate +o.inflate: h.zutil +o.inflate: h.zlib +o.inflate: h.zconf +o.inflate: h.infblock +o.inftrees: c.inftrees +o.inftrees: h.zutil +o.inftrees: h.zlib +o.inftrees: h.zconf +o.inftrees: h.inftrees +o.inftrees: h.inffixed +o.infutil: c.infutil +o.infutil: h.zutil +o.infutil: h.zlib +o.infutil: h.zconf +o.infutil: h.infblock +o.infutil: h.inftrees +o.infutil: h.infcodes +o.infutil: h.infutil +o.trees: c.trees +o.trees: h.deflate +o.trees: h.zutil +o.trees: h.zlib +o.trees: h.zconf +o.trees: h.trees +o.uncompr: c.uncompr +o.uncompr: h.zlib +o.uncompr: h.zconf +o.zutil: c.zutil +o.zutil: h.zutil +o.zutil: h.zlib +o.zutil: h.zconf Added: external/zlib/old/README ============================================================================== --- (empty file) +++ external/zlib/old/README Tue Jan 3 07:42:59 2006 @@ -0,0 +1,3 @@ +This directory contains files that have not been updated for zlib 1.2.x + +(Volunteers are encouraged to help clean this up. Thanks.) Added: external/zlib/old/descrip.mms ============================================================================== --- (empty file) +++ external/zlib/old/descrip.mms Tue Jan 3 07:42:59 2006 @@ -0,0 +1,48 @@ +# descrip.mms: MMS description file for building zlib on VMS +# written by Martin P.J. Zinser + +cc_defs = +c_deb = + +.ifdef __DECC__ +pref = /prefix=all +.endif + +OBJS = adler32.obj, compress.obj, crc32.obj, gzio.obj, uncompr.obj,\ + deflate.obj, trees.obj, zutil.obj, inflate.obj, infblock.obj,\ + inftrees.obj, infcodes.obj, infutil.obj, inffast.obj + +CFLAGS= $(C_DEB) $(CC_DEFS) $(PREF) + +all : example.exe minigzip.exe + @ write sys$output " Example applications available" +libz.olb : libz.olb($(OBJS)) + @ write sys$output " libz available" + +example.exe : example.obj libz.olb + link example,libz.olb/lib + +minigzip.exe : minigzip.obj libz.olb + link minigzip,libz.olb/lib,x11vms:xvmsutils.olb/lib + +clean : + delete *.obj;*,libz.olb;* + + +# Other dependencies. +adler32.obj : zutil.h zlib.h zconf.h +compress.obj : zlib.h zconf.h +crc32.obj : zutil.h zlib.h zconf.h +deflate.obj : deflate.h zutil.h zlib.h zconf.h +example.obj : zlib.h zconf.h +gzio.obj : zutil.h zlib.h zconf.h +infblock.obj : zutil.h zlib.h zconf.h infblock.h inftrees.h infcodes.h infutil.h +infcodes.obj : zutil.h zlib.h zconf.h inftrees.h infutil.h infcodes.h inffast.h +inffast.obj : zutil.h zlib.h zconf.h inftrees.h infutil.h inffast.h +inflate.obj : zutil.h zlib.h zconf.h infblock.h +inftrees.obj : zutil.h zlib.h zconf.h inftrees.h +infutil.obj : zutil.h zlib.h zconf.h inftrees.h infutil.h +minigzip.obj : zlib.h zconf.h +trees.obj : deflate.h zutil.h zlib.h zconf.h +uncompr.obj : zlib.h zconf.h +zutil.obj : zutil.h zlib.h zconf.h Added: external/zlib/old/os2/Makefile.os2 ============================================================================== --- (empty file) +++ external/zlib/old/os2/Makefile.os2 Tue Jan 3 07:42:59 2006 @@ -0,0 +1,136 @@ +# Makefile for zlib under OS/2 using GCC (PGCC) +# For conditions of distribution and use, see copyright notice in zlib.h + +# To compile and test, type: +# cp Makefile.os2 .. +# cd .. +# make -f Makefile.os2 test + +# This makefile will build a static library z.lib, a shared library +# z.dll and a import library zdll.lib. You can use either z.lib or +# zdll.lib by specifying either -lz or -lzdll on gcc's command line + +CC=gcc -Zomf -s + +CFLAGS=-O6 -Wall +#CFLAGS=-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7 +#CFLAGS=-g -DDEBUG +#CFLAGS=-O3 -Wall -Wwrite-strings -Wpointer-arith -Wconversion \ +# -Wstrict-prototypes -Wmissing-prototypes + +#################### BUG WARNING: ##################### +## infcodes.c hits a bug in pgcc-1.0, so you have to use either +## -O# where # <= 4 or one of (-fno-ommit-frame-pointer or -fno-force-mem) +## This bug is reportedly fixed in pgcc >1.0, but this was not tested +CFLAGS+=-fno-force-mem + +LDFLAGS=-s -L. -lzdll -Zcrtdll +LDSHARED=$(CC) -s -Zomf -Zdll -Zcrtdll + +VER=1.1.0 +ZLIB=z.lib +SHAREDLIB=z.dll +SHAREDLIBIMP=zdll.lib +LIBS=$(ZLIB) $(SHAREDLIB) $(SHAREDLIBIMP) + +AR=emxomfar cr +IMPLIB=emximp +RANLIB=echo +TAR=tar +SHELL=bash + +prefix=/usr/local +exec_prefix = $(prefix) + +OBJS = adler32.o compress.o crc32.o gzio.o uncompr.o deflate.o trees.o \ + zutil.o inflate.o infblock.o inftrees.o infcodes.o infutil.o inffast.o + +TEST_OBJS = example.o minigzip.o + +DISTFILES = README INDEX ChangeLog configure Make*[a-z0-9] *.[ch] descrip.mms \ + algorithm.txt zlib.3 msdos/Make*[a-z0-9] msdos/zlib.def msdos/zlib.rc \ + nt/Makefile.nt nt/zlib.dnt contrib/README.contrib contrib/*.txt \ + contrib/asm386/*.asm contrib/asm386/*.c \ + contrib/asm386/*.bat contrib/asm386/zlibvc.d?? contrib/iostream/*.cpp \ + contrib/iostream/*.h contrib/iostream2/*.h contrib/iostream2/*.cpp \ + contrib/untgz/Makefile contrib/untgz/*.c contrib/untgz/*.w32 + +all: example.exe minigzip.exe + +test: all + @LD_LIBRARY_PATH=.:$(LD_LIBRARY_PATH) ; export LD_LIBRARY_PATH; \ + echo hello world | ./minigzip | ./minigzip -d || \ + echo ' *** minigzip test FAILED ***' ; \ + if ./example; then \ + echo ' *** zlib test OK ***'; \ + else \ + echo ' *** zlib test FAILED ***'; \ + fi + +$(ZLIB): $(OBJS) + $(AR) $@ $(OBJS) + -@ ($(RANLIB) $@ || true) >/dev/null 2>&1 + +$(SHAREDLIB): $(OBJS) os2/z.def + $(LDSHARED) -o $@ $^ + +$(SHAREDLIBIMP): os2/z.def + $(IMPLIB) -o $@ $^ + +example.exe: example.o $(LIBS) + $(CC) $(CFLAGS) -o $@ example.o $(LDFLAGS) + +minigzip.exe: minigzip.o $(LIBS) + $(CC) $(CFLAGS) -o $@ minigzip.o $(LDFLAGS) + +clean: + rm -f *.o *~ example minigzip libz.a libz.so* foo.gz + +distclean: clean + +zip: + mv Makefile Makefile~; cp -p Makefile.in Makefile + rm -f test.c ztest*.c + v=`sed -n -e 's/\.//g' -e '/VERSION "/s/.*"\(.*\)".*/\1/p' < zlib.h`;\ + zip -ul9 zlib$$v $(DISTFILES) + mv Makefile~ Makefile + +dist: + mv Makefile Makefile~; cp -p Makefile.in Makefile + rm -f test.c ztest*.c + d=zlib-`sed -n '/VERSION "/s/.*"\(.*\)".*/\1/p' < zlib.h`;\ + rm -f $$d.tar.gz; \ + if test ! -d ../$$d; then rm -f ../$$d; ln -s `pwd` ../$$d; fi; \ + files=""; \ + for f in $(DISTFILES); do files="$$files $$d/$$f"; done; \ + cd ..; \ + GZIP=-9 $(TAR) chofz $$d/$$d.tar.gz $$files; \ + if test ! -d $$d; then rm -f $$d; fi + mv Makefile~ Makefile + +tags: + etags *.[ch] + +depend: + makedepend -- $(CFLAGS) -- *.[ch] + +# DO NOT DELETE THIS LINE -- make depend depends on it. + +adler32.o: zlib.h zconf.h +compress.o: zlib.h zconf.h +crc32.o: zlib.h zconf.h +deflate.o: deflate.h zutil.h zlib.h zconf.h +example.o: zlib.h zconf.h +gzio.o: zutil.h zlib.h zconf.h +infblock.o: infblock.h inftrees.h infcodes.h infutil.h zutil.h zlib.h zconf.h +infcodes.o: zutil.h zlib.h zconf.h +infcodes.o: inftrees.h infblock.h infcodes.h infutil.h inffast.h +inffast.o: zutil.h zlib.h zconf.h inftrees.h +inffast.o: infblock.h infcodes.h infutil.h inffast.h +inflate.o: zutil.h zlib.h zconf.h infblock.h +inftrees.o: zutil.h zlib.h zconf.h inftrees.h +infutil.o: zutil.h zlib.h zconf.h infblock.h inftrees.h infcodes.h infutil.h +minigzip.o: zlib.h zconf.h +trees.o: deflate.h zutil.h zlib.h zconf.h trees.h +uncompr.o: zlib.h zconf.h +zutil.o: zutil.h zlib.h zconf.h Added: external/zlib/old/os2/zlib.def ============================================================================== --- (empty file) +++ external/zlib/old/os2/zlib.def Tue Jan 3 07:42:59 2006 @@ -0,0 +1,51 @@ +; +; Slightly modified version of ../nt/zlib.dnt :-) +; + +LIBRARY Z +DESCRIPTION "Zlib compression library for OS/2" +CODE PRELOAD MOVEABLE DISCARDABLE +DATA PRELOAD MOVEABLE MULTIPLE + +EXPORTS + adler32 + compress + crc32 + deflate + deflateCopy + deflateEnd + deflateInit2_ + deflateInit_ + deflateParams + deflateReset + deflateSetDictionary + gzclose + gzdopen + gzerror + gzflush + gzopen + gzread + gzwrite + inflate + inflateEnd + inflateInit2_ + inflateInit_ + inflateReset + inflateSetDictionary + inflateSync + uncompress + zlibVersion + gzprintf + gzputc + gzgetc + gzseek + gzrewind + gztell + gzeof + gzsetparams + zError + inflateSyncPoint + get_crc_table + compress2 + gzputs + gzgets Added: external/zlib/old/visual-basic.txt ============================================================================== --- (empty file) +++ external/zlib/old/visual-basic.txt Tue Jan 3 07:42:59 2006 @@ -0,0 +1,160 @@ +See below some functions declarations for Visual Basic. + +Frequently Asked Question: + +Q: Each time I use the compress function I get the -5 error (not enough + room in the output buffer). + +A: Make sure that the length of the compressed buffer is passed by + reference ("as any"), not by value ("as long"). Also check that + before the call of compress this length is equal to the total size of + the compressed buffer and not zero. + + +From: "Jon Caruana" +Subject: Re: How to port zlib declares to vb? +Date: Mon, 28 Oct 1996 18:33:03 -0600 + +Got the answer! (I haven't had time to check this but it's what I got, and +looks correct): + +He has the following routines working: + compress + uncompress + gzopen + gzwrite + gzread + gzclose + +Declares follow: (Quoted from Carlos Rios , in Vb4 form) + +#If Win16 Then 'Use Win16 calls. +Declare Function compress Lib "ZLIB.DLL" (ByVal compr As + String, comprLen As Any, ByVal buf As String, ByVal buflen + As Long) As Integer +Declare Function uncompress Lib "ZLIB.DLL" (ByVal uncompr + As String, uncomprLen As Any, ByVal compr As String, ByVal + lcompr As Long) As Integer +Declare Function gzopen Lib "ZLIB.DLL" (ByVal filePath As + String, ByVal mode As String) As Long +Declare Function gzread Lib "ZLIB.DLL" (ByVal file As + Long, ByVal uncompr As String, ByVal uncomprLen As Integer) + As Integer +Declare Function gzwrite Lib "ZLIB.DLL" (ByVal file As + Long, ByVal uncompr As String, ByVal uncomprLen As Integer) + As Integer +Declare Function gzclose Lib "ZLIB.DLL" (ByVal file As + Long) As Integer +#Else +Declare Function compress Lib "ZLIB32.DLL" + (ByVal compr As String, comprLen As Any, ByVal buf As + String, ByVal buflen As Long) As Integer +Declare Function uncompress Lib "ZLIB32.DLL" + (ByVal uncompr As String, uncomprLen As Any, ByVal compr As + String, ByVal lcompr As Long) As Long +Declare Function gzopen Lib "ZLIB32.DLL" + (ByVal file As String, ByVal mode As String) As Long +Declare Function gzread Lib "ZLIB32.DLL" + (ByVal file As Long, ByVal uncompr As String, ByVal + uncomprLen As Long) As Long +Declare Function gzwrite Lib "ZLIB32.DLL" + (ByVal file As Long, ByVal uncompr As String, ByVal + uncomprLen As Long) As Long +Declare Function gzclose Lib "ZLIB32.DLL" + (ByVal file As Long) As Long +#End If + +-Jon Caruana +jon-net at usa.net +Microsoft Sitebuilder Network Level 1 Member - HTML Writer's Guild Member + + +Here is another example from Michael that he +says conforms to the VB guidelines, and that solves the problem of not +knowing the uncompressed size by storing it at the end of the file: + +'Calling the functions: +'bracket meaning: [optional] {Range of possible values} +'Call subCompressFile( [, , [level of compression {1..9}]]) +'Call subUncompressFile() + +Option Explicit +Private lngpvtPcnSml As Long 'Stores value for 'lngPercentSmaller' +Private Const SUCCESS As Long = 0 +Private Const strFilExt As String = ".cpr" +Private Declare Function lngfncCpr Lib "zlib.dll" Alias "compress2" (ByRef +dest As Any, ByRef destLen As Any, ByRef src As Any, ByVal srcLen As Long, +ByVal level As Integer) As Long +Private Declare Function lngfncUcp Lib "zlib.dll" Alias "uncompress" (ByRef +dest As Any, ByRef destLen As Any, ByRef src As Any, ByVal srcLen As Long) +As Long + +Public Sub subCompressFile(ByVal strargOriFilPth As String, Optional ByVal +strargCprFilPth As String, Optional ByVal intLvl As Integer = 9) + Dim strCprPth As String + Dim lngOriSiz As Long + Dim lngCprSiz As Long + Dim bytaryOri() As Byte + Dim bytaryCpr() As Byte + lngOriSiz = FileLen(strargOriFilPth) + ReDim bytaryOri(lngOriSiz - 1) + Open strargOriFilPth For Binary Access Read As #1 + Get #1, , bytaryOri() + Close #1 + strCprPth = IIf(strargCprFilPth = "", strargOriFilPth, strargCprFilPth) +'Select file path and name + strCprPth = strCprPth & IIf(Right(strCprPth, Len(strFilExt)) = +strFilExt, "", strFilExt) 'Add file extension if not exists + lngCprSiz = (lngOriSiz * 1.01) + 12 'Compression needs temporary a bit +more space then original file size + ReDim bytaryCpr(lngCprSiz - 1) + If lngfncCpr(bytaryCpr(0), lngCprSiz, bytaryOri(0), lngOriSiz, intLvl) = +SUCCESS Then + lngpvtPcnSml = (1# - (lngCprSiz / lngOriSiz)) * 100 + ReDim Preserve bytaryCpr(lngCprSiz - 1) + Open strCprPth For Binary Access Write As #1 + Put #1, , bytaryCpr() + Put #1, , lngOriSiz 'Add the the original size value to the end +(last 4 bytes) + Close #1 + Else + MsgBox "Compression error" + End If + Erase bytaryCpr + Erase bytaryOri +End Sub + +Public Sub subUncompressFile(ByVal strargFilPth As String) + Dim bytaryCpr() As Byte + Dim bytaryOri() As Byte + Dim lngOriSiz As Long + Dim lngCprSiz As Long + Dim strOriPth As String + lngCprSiz = FileLen(strargFilPth) + ReDim bytaryCpr(lngCprSiz - 1) + Open strargFilPth For Binary Access Read As #1 + Get #1, , bytaryCpr() + Close #1 + 'Read the original file size value: + lngOriSiz = bytaryCpr(lngCprSiz - 1) * (2 ^ 24) _ + + bytaryCpr(lngCprSiz - 2) * (2 ^ 16) _ + + bytaryCpr(lngCprSiz - 3) * (2 ^ 8) _ + + bytaryCpr(lngCprSiz - 4) + ReDim Preserve bytaryCpr(lngCprSiz - 5) 'Cut of the original size value + ReDim bytaryOri(lngOriSiz - 1) + If lngfncUcp(bytaryOri(0), lngOriSiz, bytaryCpr(0), lngCprSiz) = SUCCESS +Then + strOriPth = Left(strargFilPth, Len(strargFilPth) - Len(strFilExt)) + Open strOriPth For Binary Access Write As #1 + Put #1, , bytaryOri() + Close #1 + Else + MsgBox "Uncompression error" + End If + Erase bytaryCpr + Erase bytaryOri +End Sub +Public Property Get lngPercentSmaller() As Long + lngPercentSmaller = lngpvtPcnSml +End Property Added: external/zlib/old/zlib.html ============================================================================== --- (empty file) +++ external/zlib/old/zlib.html Tue Jan 3 07:42:59 2006 @@ -0,0 +1,971 @@ + + + + zlib general purpose compression library version 1.1.4 + + + + + +

    zlib 1.1.4 Manual

    +
    +

    Contents

    +
      +
    1. Prologue +
    2. Introduction +
    3. Utility functions +
    4. Basic functions +
    5. Advanced functions +
    6. Constants +
    7. struct z_stream_s +
    8. Checksum functions +
    9. Misc +
    +
    +

    Prologue

    + 'zlib' general purpose compression library version 1.1.4, March 11th, 2002 +

    + Copyright (C) 1995-2002 Jean-loup Gailly and Mark Adler +

    + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. +

    + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: +

      +
    1. The origin of this software must not be misrepresented ; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. +
    2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +
    3. This notice may not be removed or altered from any source distribution. +
    + +
    +
    Jean-loup Gailly +
    jloup at gzip.org +
    Mark Adler +
    madler at alumni.caltech.edu +
    + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files + + ftp://ds.internic.net/rfc/rfc1950.txt + (zlib format), + + rfc1951.txt + (deflate format) and + + rfc1952.txt + (gzip format). +

    + This manual is converted from zlib.h by + piaip +

    + Visit + http://ftp.cdrom.com/pub/infozip/zlib/ + for the official zlib web page. +

    + +


    +

    Introduction

    + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed + data. This version of the library supports only one compression method + (deflation) but other algorithms will be added later and will have the same + stream interface. +

    + + Compression can be done in a single step if the buffers are large + enough (for example if an input file is mmap'ed), or can be done by + repeated calls of the compression function. In the latter case, the + application must provide more input and/or consume the output + (providing more output space) before each call. +

    + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio. +

    + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never + crash even in case of corrupted input. +

    + +


    +

    Utility functions

    + The following utility functions are implemented on top of the +
    basic stream-oriented functions. + To simplify the interface, some + default options are assumed (compression level and memory usage, + standard memory allocation functions). The source code of these + utility functions can easily be modified if you need special options. +

    Function list

    +
      +
    • int compress (Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen); +
    • int compress2 (Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen, int level); +
    • int uncompress (Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen); +
    • typedef voidp gzFile; +
    • gzFile gzopen (const char *path, const char *mode); +
    • gzFile gzdopen (int fd, const char *mode); +
    • int gzsetparams (gzFile file, int level, int strategy); +
    • int gzread (gzFile file, voidp buf, unsigned len); +
    • int gzwrite (gzFile file, const voidp buf, unsigned len); +
    • int VA gzprintf (gzFile file, const char *format, ...); +
    • int gzputs (gzFile file, const char *s); +
    • char * gzgets (gzFile file, char *buf, int len); +
    • int gzputc (gzFile file, int c); +
    • int gzgetc (gzFile file); +
    • int gzflush (gzFile file, int flush); +
    • z_off_t gzseek (gzFile file, z_off_t offset, int whence); +
    • z_off_t gztell (gzFile file); +
    • int gzrewind (gzFile file); +
    • int gzeof (gzFile file); +
    • int gzclose (gzFile file); +
    • const char * gzerror (gzFile file, int *errnum); +
    +

    Function description

    +
    +
    int compress (Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen); +
    + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be at least 0.1% larger than + sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the + compressed buffer.

    + This function can be used to compress a whole file at once if the + input file is mmap'ed.

    + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer.

    + +

    int compress2 (Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen, int level); +
    + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least 0.1% larger than sourceLen plus + 12 bytes. Upon exit, destLen is the actual size of the compressed buffer. +

    + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +

    + +

    int uncompress (Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen); +
    + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer.

    + This function can be used to decompress a whole file at once if the + input file is mmap'ed. +

    + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted. +

    + +

    typedef voidp gzFile; +

    + +

    gzFile gzopen (const char *path, const char *mode); +
    + Opens a gzip (.gz) file for reading or writing. The mode parameter + is as in fopen ("rb" or "wb") but can also include a compression level + ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for + Huffman only compression as in "wb1h". (See the description + of deflateInit2 for more information about the strategy parameter.) +

    + + gzopen can be used to read a file which is not in gzip format ; in this + case gzread will directly read from the file without decompression. +

    + + gzopen returns NULL if the file could not be opened or if there was + insufficient memory to allocate the (de)compression state ; errno + can be checked to distinguish the two cases (if errno is zero, the + zlib error is Z_MEM_ERROR). +

    + +

    gzFile gzdopen (int fd, const char *mode); +
    + gzdopen() associates a gzFile with the file descriptor fd. File + descriptors are obtained from calls like open, dup, creat, pipe or + fileno (in the file has been previously opened with fopen). + The mode parameter is as in gzopen. +

    + The next call of gzclose on the returned gzFile will also close the + file descriptor fd, just like fclose(fdopen(fd), mode) closes the file + descriptor fd. If you want to keep fd open, use gzdopen(dup(fd), mode). +

    + gzdopen returns NULL if there was insufficient memory to allocate + the (de)compression state. +

    + +

    int gzsetparams (gzFile file, int level, int strategy); +
    + Dynamically update the compression level or strategy. See the description + of deflateInit2 for the meaning of these parameters. +

    + gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not + opened for writing. +

    + +

    int gzread (gzFile file, voidp buf, unsigned len); +
    + Reads the given number of uncompressed bytes from the compressed file. + If the input file was not in gzip format, gzread copies the given number + of bytes into the buffer. +

    + gzread returns the number of uncompressed bytes actually read (0 for + end of file, -1 for error). +

    + +

    int gzwrite (gzFile file, const voidp buf, unsigned len); +
    + Writes the given number of uncompressed bytes into the compressed file. + gzwrite returns the number of uncompressed bytes actually written + (0 in case of error). +

    + +

    int VA gzprintf (gzFile file, const char *format, ...); +
    + Converts, formats, and writes the args to the compressed file under + control of the format string, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written (0 in case of error). +

    + +

    int gzputs (gzFile file, const char *s); +
    + Writes the given null-terminated string to the compressed file, excluding + the terminating null character. +

    + gzputs returns the number of characters written, or -1 in case of error. +

    + +

    char * gzgets (gzFile file, char *buf, int len); +
    + Reads bytes from the compressed file until len-1 characters are read, or + a newline character is read and transferred to buf, or an end-of-file + condition is encountered. The string is then terminated with a null + character. +

    + gzgets returns buf, or Z_NULL in case of error. +

    + +

    int gzputc (gzFile file, int c); +
    + Writes c, converted to an unsigned char, into the compressed file. + gzputc returns the value that was written, or -1 in case of error. +

    + +

    int gzgetc (gzFile file); +
    + Reads one byte from the compressed file. gzgetc returns this byte + or -1 in case of end of file or error. +

    + +

    int gzflush (gzFile file, int flush); +
    + Flushes all pending output into the compressed file. The parameter + flush is as in the deflate() function. The return value is the zlib + error number (see function gzerror below). gzflush returns Z_OK if + the flush parameter is Z_FINISH and all output could be flushed. +

    + gzflush should be called only when strictly necessary because it can + degrade compression. +

    + +

    z_off_t gzseek (gzFile file, z_off_t offset, int whence); +
    + Sets the starting position for the next gzread or gzwrite on the + given compressed file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); + the value SEEK_END is not supported. +

    + If the file is opened for reading, this function is emulated but can be + extremely slow. If the file is opened for writing, only forward seeks are + supported ; gzseek then compresses a sequence of zeroes up to the new + starting position. +

    + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error, in + particular if the file is opened for writing and the new starting position + would be before the current position. +

    + +

    int gzrewind (gzFile file); +
    + Rewinds the given file. This function is supported only for reading. +

    + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) +

    + +

    z_off_t gztell (gzFile file); +
    + Returns the starting position for the next gzread or gzwrite on the + given compressed file. This position represents a number of bytes in the + uncompressed data stream. +

    + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +

    + +

    int gzeof (gzFile file); +
    + Returns 1 when EOF has previously been detected reading the given + input stream, otherwise zero. +

    + +

    int gzclose (gzFile file); +
    + Flushes all pending output if necessary, closes the compressed file + and deallocates all the (de)compression state. The return value is the zlib + error number (see function gzerror below). +

    + +

    const char * gzerror (gzFile file, int *errnum); +
    + Returns the error message for the last error which occurred on the + given compressed file. errnum is set to zlib error number. If an + error occurred in the file system and not in the compression library, + errnum is set to Z_ERRNO and the application may consult errno + to get the exact error code. +

    +

    +
    +

    Basic functions

    +

    Function list

    +
    + +

    Function description

    +
    +
    const char * zlibVersion (void); +
    The application can compare zlibVersion and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is + not compatible with the zlib.h header file used by the application. + This check is automatically made by deflateInit and inflateInit. +

    + +

    int deflateInit (z_streamp strm, int level); +
    + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. + If zalloc and zfree are set to Z_NULL, deflateInit updates them to + use default allocation functions. +

    + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at + all (the input data is simply copied a block at a time). +

    + + Z_DEFAULT_COMPRESSION requests a default compromise between speed and + compression (currently equivalent to level 6). +

    + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if level is not a valid compression level, + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). + msg is set to null if there is no error message. deflateInit does not + perform any compression: this will be done by deflate(). +

    + +

    int deflate (z_streamp strm, int flush); +
    + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce some + output latency (reading input without producing any output) except when + forced to flush.

    + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + +

      +
    • Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + +
    • + Provide more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary (in interactive applications). + Some output may be provided even if flush is not set. +

    + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming + more output, and updating avail_in or avail_out accordingly ; avail_out + should never be zero before the call. The application can consume the + compressed output when it wants, for example when the output buffer is full + (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK + and with zero avail_out, it must be called again after making room in the + output buffer because there might be more output pending. +

    + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In particular + avail_in is zero after the call if enough output space has been provided + before the call.) Flushing may degrade compression for some compression + algorithms and so it should be used only when necessary. +

    + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + the compression. +

    + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). +

    + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there + was enough output space ; if deflate returns with Z_OK, this function must be + called again with Z_FINISH and more output space (updated avail_out) but no + more input data, until it returns with Z_STREAM_END or an error. After + deflate has returned Z_STREAM_END, the only possible operations on the + stream are deflateReset or deflateEnd. +

    + + Z_FINISH can be used immediately after deflateInit if all the compression + is to be done in a single step. In this case, avail_out must be at least + 0.1% larger than avail_in plus 12 bytes. If deflate does not return + Z_STREAM_END, then it must be called again as described above. +

    + + deflate() sets strm-> adler to the adler32 checksum of all input read + so far (that is, total_in bytes). +

    + + deflate() may update data_type if it can make a good guess about + the input data type (Z_ASCII or Z_BINARY). In doubt, the data is considered + binary. This field is only for information purposes and does not affect + the compression algorithm in any manner. +

    + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible + (for example avail_in or avail_out was zero). +

    + +

    int deflateEnd (z_streamp strm); +
    + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any + pending output. +

    + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, + msg may be set but then points to a static string (which must not be + deallocated). +

    + +

    int inflateInit (z_streamp strm); +
    + Initializes the internal stream state for decompression. The fields + next_in, avail_in, zalloc, zfree and opaque must be initialized before by + the caller. If next_in is not Z_NULL and avail_in is large enough (the exact + value depends on the compression method), inflateInit determines the + compression method from the zlib header and allocates all data structures + accordingly ; otherwise the allocation will be deferred to the first call of + inflate. If zalloc and zfree are set to Z_NULL, inflateInit updates them to + use default allocation functions. +

    + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller. msg is set to null if there is no error + message. inflateInit does not perform any decompression apart from reading + the zlib header if present: this will be done by inflate(). (So next_in and + avail_in may be modified, but next_out and avail_out are unchanged.) +

    + +

    int inflate (z_streamp strm, int flush); +
    + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may some + introduce some output latency (reading input without producing any output) + except when forced to flush. +

    + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + +

      +
    • Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in is updated and processing + will resume at this point for the next call of inflate(). + +
    • Provide more output starting at next_out and update next_out and + avail_out accordingly. inflate() provides as much output as possible, + until there is no more input data or no more space in the output buffer + (see below about the flush parameter). +

    + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming + more output, and updating the next_* and avail_* values accordingly. + The application can consume the uncompressed output when it wants, for + example when the output buffer is full (avail_out == 0), or after each + call of inflate(). If inflate returns Z_OK and with zero avail_out, it + must be called again after making room in the output buffer because there + might be more output pending. +

    + + If the parameter flush is set to Z_SYNC_FLUSH, inflate flushes as much + output as possible to the output buffer. The flushing behavior of inflate is + not specified for values of the flush parameter other than Z_SYNC_FLUSH + and Z_FINISH, but the current implementation actually flushes as much output + as possible anyway. +

    + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step + (a single call of inflate), the parameter flush should be set to + Z_FINISH. In this case all pending input is processed and all pending + output is flushed ; avail_out must be large enough to hold all the + uncompressed data. (The size of the uncompressed data may have been saved + by the compressor for this purpose.) The next operation on this stream must + be inflateEnd to deallocate the decompression state. The use of Z_FINISH + is never required, but can be used to inform inflate that a faster routine + may be used for the single inflate() call. +

    + + If a preset dictionary is needed at this point (see inflateSetDictionary + below), inflate sets strm-adler to the adler32 checksum of the + dictionary chosen by the compressor and returns Z_NEED_DICT ; otherwise + it sets strm-> adler to the adler32 checksum of all output produced + so far (that is, total_out bytes) and returns Z_OK, Z_STREAM_END or + an error code as described below. At the end of the stream, inflate() + checks that its computed adler32 checksum is equal to that saved by the + compressor and returns Z_STREAM_END only if the checksum is correct. +

    + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect + adler32 checksum), Z_STREAM_ERROR if the stream structure was inconsistent + (for example if next_in or next_out was NULL), Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if no progress is possible or if there was not + enough room in the output buffer when Z_FINISH is used. In the Z_DATA_ERROR + case, the application may then call inflateSync to look for a good + compression block. +

    + +

    int inflateEnd (z_streamp strm); +
    + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any + pending output. +

    + + inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state + was inconsistent. In the error case, msg may be set but then points to a + static string (which must not be deallocated). +

    +
    +

    Advanced functions

    + The following functions are needed only in some special applications. +

    Function list

    +
    +

    Function description

    +
    +
    int deflateInit2 (z_streamp strm, int level, int method, int windowBits, int memLevel, int strategy); + +
    This is another version of deflateInit with more compression options. The + fields next_in, zalloc, zfree and opaque must be initialized before by + the caller.

    + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library.

    + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead.

    + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but + is slow and reduces compression ratio ; memLevel=9 uses maximum memory + for optimal speed. The default value is 8. See zconf.h for total memory + usage as a function of windowBits and memLevel.

    + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), or Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match). Filtered data consists mostly of small values with a + somewhat random distribution. In this case, the compression algorithm is + tuned to compress them better. The effect of Z_FILTERED is to force more + Huffman coding and less string matching ; it is somewhat intermediate + between Z_DEFAULT and Z_HUFFMAN_ONLY. The strategy parameter only affects + the compression ratio but not the correctness of the compressed output even + if it is not set appropriately.

    + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid + method). msg is set to null if there is no error message. deflateInit2 does + not perform any compression: this will be done by deflate().

    + +

    int deflateSetDictionary (z_streamp strm, const Bytef *dictionary, uInt dictLength); +
    + Initializes the compression dictionary from the given byte sequence + without producing any compressed output. This function must be called + immediately after deflateInit, deflateInit2 or deflateReset, before any + call of deflate. The compressor and decompressor must use exactly the same + dictionary (see inflateSetDictionary).

    + + The dictionary should consist of strings (byte sequences) that are likely + to be encountered later in the data to be compressed, with the most commonly + used strings preferably put towards the end of the dictionary. Using a + dictionary is most useful when the data to be compressed is short and can be + predicted with good accuracy ; the data can then be compressed better than + with the default empty dictionary.

    + + Depending on the size of the compression data structures selected by + deflateInit or deflateInit2, a part of the dictionary may in effect be + discarded, for example if the dictionary is larger than the window size in + deflate or deflate2. Thus the strings most likely to be useful should be + put at the end of the dictionary, not at the front.

    + + Upon return of this function, strm-> adler is set to the Adler32 value + of the dictionary ; the decompressor may later use this value to determine + which dictionary has been used by the compressor. (The Adler32 value + applies to the whole dictionary even if only a subset of the dictionary is + actually used by the compressor.)

    + + deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a + parameter is invalid (such as NULL dictionary) or the stream state is + inconsistent (for example if deflate has already been called for this stream + or if the compression method is bsort). deflateSetDictionary does not + perform any compression: this will be done by deflate().

    + +

    int deflateCopy (z_streamp dest, z_streamp source); +
    + Sets the destination stream as a complete copy of the source stream.

    + + This function can be useful when several compression strategies will be + tried, for example when there are several ways of pre-processing the input + data with a filter. The streams that will be discarded should then be freed + by calling deflateEnd. Note that deflateCopy duplicates the internal + compression state which can be quite large, so this strategy is slow and + can consume lots of memory.

    + + deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being NULL). msg is left unchanged in both source and + destination.

    + +

    int deflateReset (z_streamp strm); +
    This function is equivalent to deflateEnd followed by deflateInit, + but does not free and reallocate all the internal compression state. + The stream will keep the same compression level and any other attributes + that may have been set by deflateInit2.

    + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being NULL).

    + +

    int deflateParams (z_streamp strm, int level, int strategy); +
    + Dynamically update the compression level and compression strategy. The + interpretation of level and strategy is as in deflateInit2. This can be + used to switch between compression and straight copy of the input data, or + to switch to a different kind of input data requiring a different + strategy. If the compression level is changed, the input available so far + is compressed with the old level (and may be flushed); the new level will + take effect only at the next call of deflate().

    + + Before the call of deflateParams, the stream state must be set as for + a call of deflate(), since the currently available input may have to + be compressed and flushed. In particular, strm-> avail_out must be + non-zero.

    + + deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source + stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR + if strm->avail_out was zero.

    + +

    int inflateInit2 (z_streamp strm, int windowBits); + +
    This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller.

    + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. If a compressed stream with a larger window size is given as + input, inflate() will return with the error code Z_DATA_ERROR instead of + trying to allocate a larger window.

    + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if a parameter is invalid (such as a negative + memLevel). msg is set to null if there is no error message. inflateInit2 + does not perform any decompression apart from reading the zlib header if + present: this will be done by inflate(). (So next_in and avail_in may be + modified, but next_out and avail_out are unchanged.)

    + +

    int inflateSetDictionary (z_streamp strm, const Bytef *dictionary, uInt dictLength); +
    + Initializes the decompression dictionary from the given uncompressed byte + sequence. This function must be called immediately after a call of inflate + if this call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the Adler32 value returned by this call of + inflate. The compressor and decompressor must use exactly the same + dictionary (see deflateSetDictionary).

    + + inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a + parameter is invalid (such as NULL dictionary) or the stream state is + inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the + expected one (incorrect Adler32 value). inflateSetDictionary does not + perform any decompression: this will be done by subsequent calls of + inflate().

    + +

    int inflateSync (z_streamp strm); + +
    Skips invalid compressed data until a full flush point (see above the + description of deflate with Z_FULL_FLUSH) can be found, or until all + available input is skipped. No output is provided.

    + + inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR + if no more input was provided, Z_DATA_ERROR if no flush point has been found, + or Z_STREAM_ERROR if the stream structure was inconsistent. In the success + case, the application may save the current current value of total_in which + indicates where valid compressed data was found. In the error case, the + application may repeatedly call inflateSync, providing more input each time, + until success or end of the input data.

    + +

    int inflateReset (z_streamp strm); +
    + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate all the internal decompression state. + The stream will keep attributes that may have been set by inflateInit2. +

    + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being NULL). +

    +

    + +
    +

    Checksum functions

    + These functions are not related to compression but are exported + anyway because they might be useful in applications using the + compression library. +

    Function list

    +
    +

    Function description

    +
    +
    uLong adler32 (uLong adler, const Bytef *buf, uInt len); +
    + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. If buf is NULL, this function returns + the required initial value for the checksum. +

    + An Adler-32 checksum is almost as reliable as a CRC32 but can be computed + much faster. Usage example: +

    +
    +     uLong adler = adler32(0L, Z_NULL, 0);
    +
    +     while (read_buffer(buffer, length) != EOF) {
    +       adler = adler32(adler, buffer, length);
    +     }
    +     if (adler != original_adler) error();
    +   
    + +
    uLong crc32 (uLong crc, const Bytef *buf, uInt len); +
    + Update a running crc with the bytes buf[0..len-1] and return the updated + crc. If buf is NULL, this function returns the required initial value + for the crc. Pre- and post-conditioning (one's complement) is performed + within this function so it shouldn't be done by the application. + Usage example: +
    +
    +     uLong crc = crc32(0L, Z_NULL, 0);
    +
    +     while (read_buffer(buffer, length) != EOF) {
    +       crc = crc32(crc, buffer, length);
    +     }
    +     if (crc != original_crc) error();
    +   
    +
    +
    +

    struct z_stream_s

    + +
    +
    +typedef struct z_stream_s {
    +    Bytef    *next_in;  /* next input byte */
    +    uInt     avail_in;  /* number of bytes available at next_in */
    +    uLong    total_in;  /* total nb of input bytes read so far */
    +
    +    Bytef    *next_out; /* next output byte should be put there */
    +    uInt     avail_out; /* remaining free space at next_out */
    +    uLong    total_out; /* total nb of bytes output so far */
    +
    +    char     *msg;      /* last error message, NULL if no error */
    +    struct internal_state FAR *state; /* not visible by applications */
    +
    +    alloc_func zalloc;  /* used to allocate the internal state */
    +    free_func  zfree;   /* used to free the internal state */
    +    voidpf     opaque;  /* private data object passed to zalloc and zfree */
    +
    +    int     data_type;  /* best guess about the data type: ascii or binary */
    +    uLong   adler;      /* adler32 value of the uncompressed data */
    +    uLong   reserved;   /* reserved for future use */
    +} z_stream ;
    +
    +typedef z_stream FAR * z_streamp;  ÿ
    +
    +
    + The application must update next_in and avail_in when avail_in has + dropped to zero. It must update next_out and avail_out when avail_out + has dropped to zero. The application must initialize zalloc, zfree and + opaque before calling the init function. All other fields are set by the + compression library and must not be updated by the application.

    + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value.

    + + zalloc must return Z_NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe.

    + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this + if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, + pointers returned by zalloc for objects of exactly 65536 bytes *must* + have their offset normalized to zero. The default allocation function + provided by this library ensures this (see zutil.c). To reduce memory + requirements and avoid any allocation of 64K objects, at the expense of + compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h). +

    + + The fields total_in and total_out can be used for statistics or + progress reports. After compression, total_in holds the total size of + the uncompressed data and may be saved for use in the decompressor + (particularly if the decompressor wants to decompress everything in + a single step).

    + +


    +

    Constants

    + +
    +#define Z_NO_FLUSH      0
    +#define Z_PARTIAL_FLUSH 1
    +	/* will be removed, use Z_SYNC_FLUSH instead */
    +#define Z_SYNC_FLUSH    2
    +#define Z_FULL_FLUSH    3
    +#define Z_FINISH        4
    +/* Allowed flush values ; see deflate() below for details */
    +
    +#define Z_OK            0
    +#define Z_STREAM_END    1
    +#define Z_NEED_DICT     2
    +#define Z_ERRNO        (-1)
    +#define Z_STREAM_ERROR (-2)
    +#define Z_DATA_ERROR   (-3)
    +#define Z_MEM_ERROR    (-4)
    +#define Z_BUF_ERROR    (-5)
    +#define Z_VERSION_ERROR (-6)
    +/* Return codes for the compression/decompression functions. Negative
    + * values are errors, positive values are used for special but normal events.
    + */
    +
    +#define Z_NO_COMPRESSION         0
    +#define Z_BEST_SPEED             1
    +#define Z_BEST_COMPRESSION       9
    +#define Z_DEFAULT_COMPRESSION  (-1)
    +/* compression levels */
    +
    +#define Z_FILTERED            1
    +#define Z_HUFFMAN_ONLY        2
    +#define Z_DEFAULT_STRATEGY    0
    +/* compression strategy ; see deflateInit2() below for details */
    +
    +#define Z_BINARY   0
    +#define Z_ASCII    1
    +#define Z_UNKNOWN  2
    +/* Possible values of the data_type field */
    +
    +#define Z_DEFLATED   8
    +/* The deflate compression method (the only one supported in this version) */
    +
    +#define Z_NULL  0  /* for initializing zalloc, zfree, opaque */
    +
    +#define zlib_version zlibVersion()
    +/* for compatibility with versions less than 1.0.2 */
    +
    +
    + +
    +

    Misc

    +
    deflateInit and inflateInit are macros to allow checking the zlib version + and the compiler's view of z_stream. +

    + Other functions: +

    +
    const char * zError (int err); +
    int inflateSyncPoint (z_streamp z); +
    const uLongf * get_crc_table (void); +
    +
    + + Last update: Wed Oct 13 20:42:34 1999
    + piapi at csie.ntu.edu.tw +
    + + + Added: external/zlib/projects/README.projects ============================================================================== --- (empty file) +++ external/zlib/projects/README.projects Tue Jan 3 07:42:59 2006 @@ -0,0 +1,41 @@ +This directory contains project files for building zlib under various +Integrated Development Environments (IDE). + +If you wish to submit a new project to this directory, you should comply +to the following requirements. Otherwise (e.g. if you wish to integrate +a custom piece of code that changes the zlib interface or its behavior), +please consider submitting the project to the contrib directory. + + +Requirements +============ + +- The project must build zlib using the source files from the official + zlib source distribution, exclusively. + +- If the project produces redistributable builds (e.g. shared objects + or DLL files), these builds must be compatible to those produced by + makefiles, if such makefiles exist in the zlib distribution. + In particular, if the project produces a DLL build for the Win32 + platform, this build must comply to the officially-ammended Win32 DLL + Application Binary Interface (ABI), described in win32/DLL_FAQ.txt. + +- The project may provide additional build targets, which depend on + 3rd-party (unofficially-supported) software, present in the contrib + directory. For example, it is possible to provide an "ASM build", + besides the officially-supported build, and have ASM source files + among its dependencies. + +- If there are significant differences between the project files created + by different versions of an IDE (e.g. Visual C++ 6.0 vs. 7.0), the name + of the project directory should contain the version number of the IDE + for which the project is intended (e.g. "visualc6" for Visual C++ 6.0, + or "visualc7" for Visual C++ 7.0 and 7.1). + + +Current projects +================ + +visualc6/ by Simon-Pierre Cadieux + and Cosmin Truta + Project for Microsoft Visual C++ 6.0 Added: external/zlib/projects/visualc6/README.txt ============================================================================== --- (empty file) +++ external/zlib/projects/visualc6/README.txt Tue Jan 3 07:42:59 2006 @@ -0,0 +1,73 @@ +Microsoft Developer Studio Project Files, Format Version 6.00 for zlib. + +Copyright (C) 2000-2004 Simon-Pierre Cadieux. +Copyright (C) 2004 Cosmin Truta. +For conditions of distribution and use, see copyright notice in zlib.h. + + +This project builds the zlib binaries as follows: + +* Win32_DLL_Release\zlib1.dll DLL build +* Win32_DLL_Debug\zlib1d.dll DLL build (debug version) +* Win32_DLL_ASM_Release\zlib1.dll DLL build using ASM code +* Win32_DLL_ASM_Debug\zlib1d.dll DLL build using ASM code (debug version) +* Win32_LIB_Release\zlib.lib static build +* Win32_LIB_Debug\zlibd.lib static build (debug version) +* Win32_LIB_ASM_Release\zlib.lib static build using ASM code +* Win32_LIB_ASM_Debug\zlibd.lib static build using ASM code (debug version) + + +For more information regarding the DLL builds, please see the DLL FAQ +in ..\..\win32\DLL_FAQ.txt. + + +To build and test: + +1) On the main menu, select "File | Open Workspace". + Open "zlib.dsw". + +2) Select "Build | Set Active Configuration". + Choose the configuration you wish to build. + +3) Select "Build | Clean". + +4) Select "Build | Build ... (F7)". Ignore warning messages about + not being able to find certain include files (e.g. alloc.h). + +5) If you built one of the sample programs (example or minigzip), + select "Build | Execute ... (Ctrl+F5)". + + +To use: + +1) Select "Project | Settings (Alt+F7)". + Make note of the configuration names used in your project. + Usually, these names are "Win32 Release" and "Win32 Debug". + +2) In the Workspace window, select the "FileView" tab. + Right-click on the root item "Workspace '...'". + Select "Insert Project into Workspace". + Switch on the checkbox "Dependency of:", and select the name + of your project. Open "zlib.dsp". + +3) Select "Build | Configurations". + For each configuration of your project: + 3.1) Choose the zlib configuration you wish to use. + 3.2) Click on "Add". + 3.3) Set the new zlib configuration name to the name used by + the configuration from the current iteration. + +4) Select "Build | Set Active Configuration". + Choose the configuration you wish to build. + +5) Select "Build | Build ... (F7)". + +6) If you built an executable program, select + "Build | Execute ... (Ctrl+F5)". + + +Note: + +To build the ASM-enabled code, you need Microsoft Assembler +(ML.EXE). You can get it by downloading and installing the +latest Processor Pack for Visual C++ 6.0. Added: external/zlib/projects/visualc6/example.dsp ============================================================================== --- (empty file) +++ external/zlib/projects/visualc6/example.dsp Tue Jan 3 07:42:59 2006 @@ -0,0 +1,278 @@ +# Microsoft Developer Studio Project File - Name="example" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** DO NOT EDIT ** + +# TARGTYPE "Win32 (x86) Console Application" 0x0103 + +CFG=example - Win32 LIB Debug +!MESSAGE This is not a valid makefile. To build this project using NMAKE, +!MESSAGE use the Export Makefile command and run +!MESSAGE +!MESSAGE NMAKE /f "example.mak". +!MESSAGE +!MESSAGE You can specify a configuration when running NMAKE +!MESSAGE by defining the macro CFG on the command line. For example: +!MESSAGE +!MESSAGE NMAKE /f "example.mak" CFG="example - Win32 LIB Debug" +!MESSAGE +!MESSAGE Possible choices for configuration are: +!MESSAGE +!MESSAGE "example - Win32 DLL Release" (based on "Win32 (x86) Console Application") +!MESSAGE "example - Win32 DLL Debug" (based on "Win32 (x86) Console Application") +!MESSAGE "example - Win32 DLL ASM Release" (based on "Win32 (x86) Console Application") +!MESSAGE "example - Win32 DLL ASM Debug" (based on "Win32 (x86) Console Application") +!MESSAGE "example - Win32 LIB Release" (based on "Win32 (x86) Console Application") +!MESSAGE "example - Win32 LIB Debug" (based on "Win32 (x86) Console Application") +!MESSAGE "example - Win32 LIB ASM Release" (based on "Win32 (x86) Console Application") +!MESSAGE "example - Win32 LIB ASM Debug" (based on "Win32 (x86) Console Application") +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +CPP=cl.exe +RSC=rc.exe + +!IF "$(CFG)" == "example - Win32 DLL Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "example___Win32_DLL_Release" +# PROP BASE Intermediate_Dir "example___Win32_DLL_Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Win32_DLL_Release" +# PROP Intermediate_Dir "Win32_DLL_Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT BASE CPP /YX +# ADD CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 /nologo /subsystem:console /machine:I386 + +!ELSEIF "$(CFG)" == "example - Win32 DLL Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "example___Win32_DLL_Debug" +# PROP BASE Intermediate_Dir "example___Win32_DLL_Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Win32_DLL_Debug" +# PROP Intermediate_Dir "Win32_DLL_Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT BASE CPP /YX +# ADD CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept + +!ELSEIF "$(CFG)" == "example - Win32 DLL ASM Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "example___Win32_DLL_ASM_Release" +# PROP BASE Intermediate_Dir "example___Win32_DLL_ASM_Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Win32_DLL_ASM_Release" +# PROP Intermediate_Dir "Win32_DLL_ASM_Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT BASE CPP /YX +# ADD CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 /nologo /subsystem:console /machine:I386 + +!ELSEIF "$(CFG)" == "example - Win32 DLL ASM Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "example___Win32_DLL_ASM_Debug" +# PROP BASE Intermediate_Dir "example___Win32_DLL_ASM_Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Win32_DLL_ASM_Debug" +# PROP Intermediate_Dir "Win32_DLL_ASM_Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT BASE CPP /YX +# ADD CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept + +!ELSEIF "$(CFG)" == "example - Win32 LIB Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "example___Win32_LIB_Release" +# PROP BASE Intermediate_Dir "example___Win32_LIB_Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Win32_LIB_Release" +# PROP Intermediate_Dir "Win32_LIB_Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT BASE CPP /YX +# ADD CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 /nologo /subsystem:console /machine:I386 + +!ELSEIF "$(CFG)" == "example - Win32 LIB Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "example___Win32_LIB_Debug" +# PROP BASE Intermediate_Dir "example___Win32_LIB_Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Win32_LIB_Debug" +# PROP Intermediate_Dir "Win32_LIB_Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT BASE CPP /YX +# ADD CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept + +!ELSEIF "$(CFG)" == "example - Win32 LIB ASM Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "example___Win32_LIB_ASM_Release" +# PROP BASE Intermediate_Dir "example___Win32_LIB_ASM_Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Win32_LIB_ASM_Release" +# PROP Intermediate_Dir "Win32_LIB_ASM_Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT BASE CPP /YX +# ADD CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 /nologo /subsystem:console /machine:I386 + +!ELSEIF "$(CFG)" == "example - Win32 LIB ASM Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "example___Win32_LIB_ASM_Debug" +# PROP BASE Intermediate_Dir "example___Win32_LIB_ASM_Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Win32_LIB_ASM_Debug" +# PROP Intermediate_Dir "Win32_LIB_ASM_Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT BASE CPP /YX +# ADD CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept + +!ENDIF + +# Begin Target + +# Name "example - Win32 DLL Release" +# Name "example - Win32 DLL Debug" +# Name "example - Win32 DLL ASM Release" +# Name "example - Win32 DLL ASM Debug" +# Name "example - Win32 LIB Release" +# Name "example - Win32 LIB Debug" +# Name "example - Win32 LIB ASM Release" +# Name "example - Win32 LIB ASM Debug" +# Begin Group "Source Files" + +# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" +# Begin Source File + +SOURCE=..\..\example.c +# End Source File +# End Group +# Begin Group "Header Files" + +# PROP Default_Filter "h;hpp;hxx;hm;inl" +# Begin Source File + +SOURCE=..\..\zconf.h +# End Source File +# Begin Source File + +SOURCE=..\..\zlib.h +# End Source File +# End Group +# End Target +# End Project Added: external/zlib/projects/visualc6/minigzip.dsp ============================================================================== --- (empty file) +++ external/zlib/projects/visualc6/minigzip.dsp Tue Jan 3 07:42:59 2006 @@ -0,0 +1,278 @@ +# Microsoft Developer Studio Project File - Name="minigzip" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** DO NOT EDIT ** + +# TARGTYPE "Win32 (x86) Console Application" 0x0103 + +CFG=minigzip - Win32 LIB Debug +!MESSAGE This is not a valid makefile. To build this project using NMAKE, +!MESSAGE use the Export Makefile command and run +!MESSAGE +!MESSAGE NMAKE /f "minigzip.mak". +!MESSAGE +!MESSAGE You can specify a configuration when running NMAKE +!MESSAGE by defining the macro CFG on the command line. For example: +!MESSAGE +!MESSAGE NMAKE /f "minigzip.mak" CFG="minigzip - Win32 LIB Debug" +!MESSAGE +!MESSAGE Possible choices for configuration are: +!MESSAGE +!MESSAGE "minigzip - Win32 DLL Release" (based on "Win32 (x86) Console Application") +!MESSAGE "minigzip - Win32 DLL Debug" (based on "Win32 (x86) Console Application") +!MESSAGE "minigzip - Win32 DLL ASM Release" (based on "Win32 (x86) Console Application") +!MESSAGE "minigzip - Win32 DLL ASM Debug" (based on "Win32 (x86) Console Application") +!MESSAGE "minigzip - Win32 LIB Release" (based on "Win32 (x86) Console Application") +!MESSAGE "minigzip - Win32 LIB Debug" (based on "Win32 (x86) Console Application") +!MESSAGE "minigzip - Win32 LIB ASM Release" (based on "Win32 (x86) Console Application") +!MESSAGE "minigzip - Win32 LIB ASM Debug" (based on "Win32 (x86) Console Application") +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +CPP=cl.exe +RSC=rc.exe + +!IF "$(CFG)" == "minigzip - Win32 DLL Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "minigzip___Win32_DLL_Release" +# PROP BASE Intermediate_Dir "minigzip___Win32_DLL_Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Win32_DLL_Release" +# PROP Intermediate_Dir "Win32_DLL_Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT BASE CPP /YX +# ADD CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 /nologo /subsystem:console /machine:I386 + +!ELSEIF "$(CFG)" == "minigzip - Win32 DLL Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "minigzip___Win32_DLL_Debug" +# PROP BASE Intermediate_Dir "minigzip___Win32_DLL_Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Win32_DLL_Debug" +# PROP Intermediate_Dir "Win32_DLL_Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT BASE CPP /YX +# ADD CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept + +!ELSEIF "$(CFG)" == "minigzip - Win32 DLL ASM Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "minigzip___Win32_DLL_ASM_Release" +# PROP BASE Intermediate_Dir "minigzip___Win32_DLL_ASM_Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Win32_DLL_ASM_Release" +# PROP Intermediate_Dir "Win32_DLL_ASM_Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT BASE CPP /YX +# ADD CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 /nologo /subsystem:console /machine:I386 + +!ELSEIF "$(CFG)" == "minigzip - Win32 DLL ASM Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "minigzip___Win32_DLL_ASM_Debug" +# PROP BASE Intermediate_Dir "minigzip___Win32_DLL_ASM_Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Win32_DLL_ASM_Debug" +# PROP Intermediate_Dir "Win32_DLL_ASM_Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT BASE CPP /YX +# ADD CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept + +!ELSEIF "$(CFG)" == "minigzip - Win32 LIB Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "minigzip___Win32_LIB_Release" +# PROP BASE Intermediate_Dir "minigzip___Win32_LIB_Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Win32_LIB_Release" +# PROP Intermediate_Dir "Win32_LIB_Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT BASE CPP /YX +# ADD CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 /nologo /subsystem:console /machine:I386 + +!ELSEIF "$(CFG)" == "minigzip - Win32 LIB Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "minigzip___Win32_LIB_Debug" +# PROP BASE Intermediate_Dir "minigzip___Win32_LIB_Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Win32_LIB_Debug" +# PROP Intermediate_Dir "Win32_LIB_Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT BASE CPP /YX +# ADD CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept + +!ELSEIF "$(CFG)" == "minigzip - Win32 LIB ASM Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "minigzip___Win32_LIB_ASM_Release" +# PROP BASE Intermediate_Dir "minigzip___Win32_LIB_ASM_Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Win32_LIB_ASM_Release" +# PROP Intermediate_Dir "Win32_LIB_ASM_Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT BASE CPP /YX +# ADD CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 /nologo /subsystem:console /machine:I386 + +!ELSEIF "$(CFG)" == "minigzip - Win32 LIB ASM Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "minigzip___Win32_LIB_ASM_Debug" +# PROP BASE Intermediate_Dir "minigzip___Win32_LIB_ASM_Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Win32_LIB_ASM_Debug" +# PROP Intermediate_Dir "Win32_LIB_ASM_Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT BASE CPP /YX +# ADD CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept + +!ENDIF + +# Begin Target + +# Name "minigzip - Win32 DLL Release" +# Name "minigzip - Win32 DLL Debug" +# Name "minigzip - Win32 DLL ASM Release" +# Name "minigzip - Win32 DLL ASM Debug" +# Name "minigzip - Win32 LIB Release" +# Name "minigzip - Win32 LIB Debug" +# Name "minigzip - Win32 LIB ASM Release" +# Name "minigzip - Win32 LIB ASM Debug" +# Begin Group "Source Files" + +# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" +# Begin Source File + +SOURCE=..\..\minigzip.c +# End Source File +# End Group +# Begin Group "Header Files" + +# PROP Default_Filter "h;hpp;hxx;hm;inl" +# Begin Source File + +SOURCE=..\..\zconf.h +# End Source File +# Begin Source File + +SOURCE=..\..\zlib.h +# End Source File +# End Group +# End Target +# End Project Added: external/zlib/projects/visualc6/zlib.dsp ============================================================================== --- (empty file) +++ external/zlib/projects/visualc6/zlib.dsp Tue Jan 3 07:42:59 2006 @@ -0,0 +1,609 @@ +# Microsoft Developer Studio Project File - Name="zlib" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** DO NOT EDIT ** + +# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102 +# TARGTYPE "Win32 (x86) Static Library" 0x0104 + +CFG=zlib - Win32 LIB Debug +!MESSAGE This is not a valid makefile. To build this project using NMAKE, +!MESSAGE use the Export Makefile command and run +!MESSAGE +!MESSAGE NMAKE /f "zlib.mak". +!MESSAGE +!MESSAGE You can specify a configuration when running NMAKE +!MESSAGE by defining the macro CFG on the command line. For example: +!MESSAGE +!MESSAGE NMAKE /f "zlib.mak" CFG="zlib - Win32 LIB Debug" +!MESSAGE +!MESSAGE Possible choices for configuration are: +!MESSAGE +!MESSAGE "zlib - Win32 DLL Release" (based on "Win32 (x86) Dynamic-Link Library") +!MESSAGE "zlib - Win32 DLL Debug" (based on "Win32 (x86) Dynamic-Link Library") +!MESSAGE "zlib - Win32 DLL ASM Release" (based on "Win32 (x86) Dynamic-Link Library") +!MESSAGE "zlib - Win32 DLL ASM Debug" (based on "Win32 (x86) Dynamic-Link Library") +!MESSAGE "zlib - Win32 LIB Release" (based on "Win32 (x86) Static Library") +!MESSAGE "zlib - Win32 LIB Debug" (based on "Win32 (x86) Static Library") +!MESSAGE "zlib - Win32 LIB ASM Release" (based on "Win32 (x86) Static Library") +!MESSAGE "zlib - Win32 LIB ASM Debug" (based on "Win32 (x86) Static Library") +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" + +!IF "$(CFG)" == "zlib - Win32 DLL Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "zlib___Win32_DLL_Release" +# PROP BASE Intermediate_Dir "zlib___Win32_DLL_Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Win32_DLL_Release" +# PROP Intermediate_Dir "Win32_DLL_Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +CPP=cl.exe +# ADD BASE CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT BASE CPP /YX /Yc /Yu +# ADD CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT CPP /YX /Yc /Yu +MTL=midl.exe +# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32 +# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32 +RSC=rc.exe +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:I386 +# ADD LINK32 /nologo /dll /machine:I386 /out:"Win32_DLL_Release\zlib1.dll" + +!ELSEIF "$(CFG)" == "zlib - Win32 DLL Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "zlib___Win32_DLL_Debug" +# PROP BASE Intermediate_Dir "zlib___Win32_DLL_Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Win32_DLL_Debug" +# PROP Intermediate_Dir "Win32_DLL_Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +CPP=cl.exe +# ADD BASE CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT BASE CPP /YX /Yc /Yu +# ADD CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT CPP /YX /Yc /Yu +MTL=midl.exe +# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32 +# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32 +RSC=rc.exe +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:I386 /pdbtype:sept +# ADD LINK32 /nologo /dll /debug /machine:I386 /out:"Win32_DLL_Debug\zlib1d.dll" /pdbtype:sept + +!ELSEIF "$(CFG)" == "zlib - Win32 DLL ASM Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "zlib___Win32_DLL_ASM_Release" +# PROP BASE Intermediate_Dir "zlib___Win32_DLL_ASM_Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Win32_DLL_ASM_Release" +# PROP Intermediate_Dir "Win32_DLL_ASM_Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +CPP=cl.exe +# ADD BASE CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT BASE CPP /YX /Yc /Yu +# ADD CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /D "ASMV" /D "ASMINF" /FD /c +# SUBTRACT CPP /YX /Yc /Yu +MTL=midl.exe +# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32 +# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32 +RSC=rc.exe +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:I386 +# ADD LINK32 /nologo /dll /machine:I386 /out:"Win32_DLL_ASM_Release\zlib1.dll" + +!ELSEIF "$(CFG)" == "zlib - Win32 DLL ASM Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "zlib___Win32_DLL_ASM_Debug" +# PROP BASE Intermediate_Dir "zlib___Win32_DLL_ASM_Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Win32_DLL_ASM_Debug" +# PROP Intermediate_Dir "Win32_DLL_ASM_Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +CPP=cl.exe +# ADD BASE CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT BASE CPP /YX /Yc /Yu +# ADD CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /D "ASMV" /D "ASMINF" /FD /GZ /c +# SUBTRACT CPP /YX /Yc /Yu +MTL=midl.exe +# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32 +# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32 +RSC=rc.exe +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:I386 /pdbtype:sept +# ADD LINK32 /nologo /dll /debug /machine:I386 /out:"Win32_DLL_ASM_Debug\zlib1d.dll" /pdbtype:sept + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "zlib___Win32_LIB_Release" +# PROP BASE Intermediate_Dir "zlib___Win32_LIB_Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Win32_LIB_Release" +# PROP Intermediate_Dir "Win32_LIB_Release" +# PROP Target_Dir "" +CPP=cl.exe +# ADD BASE CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT BASE CPP /YX /Yc /Yu +# ADD CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT CPP /YX /Yc /Yu +RSC=rc.exe +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LIB32=link.exe -lib +# ADD BASE LIB32 /nologo +# ADD LIB32 /nologo + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "zlib___Win32_LIB_Debug" +# PROP BASE Intermediate_Dir "zlib___Win32_LIB_Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Win32_LIB_Debug" +# PROP Intermediate_Dir "Win32_LIB_Debug" +# PROP Target_Dir "" +CPP=cl.exe +# ADD BASE CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT BASE CPP /YX /Yc /Yu +# ADD CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT CPP /YX /Yc /Yu +RSC=rc.exe +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LIB32=link.exe -lib +# ADD BASE LIB32 /nologo +# ADD LIB32 /nologo /out:"Win32_LIB_Debug\zlibd.lib" + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB ASM Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "zlib___Win32_LIB_ASM_Release" +# PROP BASE Intermediate_Dir "zlib___Win32_LIB_ASM_Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Win32_LIB_ASM_Release" +# PROP Intermediate_Dir "Win32_LIB_ASM_Release" +# PROP Target_Dir "" +CPP=cl.exe +# ADD BASE CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /FD /c +# SUBTRACT BASE CPP /YX /Yc /Yu +# ADD CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /D "ASMV" /D "ASMINF" /FD /c +# SUBTRACT CPP /YX /Yc /Yu +RSC=rc.exe +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LIB32=link.exe -lib +# ADD BASE LIB32 /nologo +# ADD LIB32 /nologo + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB ASM Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "zlib___Win32_LIB_ASM_Debug" +# PROP BASE Intermediate_Dir "zlib___Win32_LIB_ASM_Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Win32_LIB_ASM_Debug" +# PROP Intermediate_Dir "Win32_LIB_ASM_Debug" +# PROP Target_Dir "" +CPP=cl.exe +# ADD BASE CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /FD /GZ /c +# SUBTRACT BASE CPP /YX /Yc /Yu +# ADD CPP /nologo /MDd /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /D "ASMV" /D "ASMINF" /FD /GZ /c +# SUBTRACT CPP /YX /Yc /Yu +RSC=rc.exe +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LIB32=link.exe -lib +# ADD BASE LIB32 /nologo +# ADD LIB32 /nologo /out:"Win32_LIB_ASM_Debug\zlibd.lib" + +!ENDIF + +# Begin Target + +# Name "zlib - Win32 DLL Release" +# Name "zlib - Win32 DLL Debug" +# Name "zlib - Win32 DLL ASM Release" +# Name "zlib - Win32 DLL ASM Debug" +# Name "zlib - Win32 LIB Release" +# Name "zlib - Win32 LIB Debug" +# Name "zlib - Win32 LIB ASM Release" +# Name "zlib - Win32 LIB ASM Debug" +# Begin Group "Source Files" + +# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" +# Begin Source File + +SOURCE=..\..\adler32.c +# End Source File +# Begin Source File + +SOURCE=..\..\compress.c +# End Source File +# Begin Source File + +SOURCE=..\..\crc32.c +# End Source File +# Begin Source File + +SOURCE=..\..\deflate.c +# End Source File +# Begin Source File + +SOURCE=..\..\gzio.c +# End Source File +# Begin Source File + +SOURCE=..\..\infback.c +# End Source File +# Begin Source File + +SOURCE=..\..\inffast.c +# End Source File +# Begin Source File + +SOURCE=..\..\inflate.c +# End Source File +# Begin Source File + +SOURCE=..\..\inftrees.c +# End Source File +# Begin Source File + +SOURCE=..\..\trees.c +# End Source File +# Begin Source File + +SOURCE=..\..\uncompr.c +# End Source File +# Begin Source File + +SOURCE=..\..\win32\zlib.def + +!IF "$(CFG)" == "zlib - Win32 DLL Release" + +!ELSEIF "$(CFG)" == "zlib - Win32 DLL Debug" + +!ELSEIF "$(CFG)" == "zlib - Win32 DLL ASM Release" + +!ELSEIF "$(CFG)" == "zlib - Win32 DLL ASM Debug" + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB Release" + +# PROP Exclude_From_Build 1 + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB Debug" + +# PROP Exclude_From_Build 1 + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB ASM Release" + +# PROP Exclude_From_Build 1 + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB ASM Debug" + +# PROP Exclude_From_Build 1 + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=..\..\zutil.c +# End Source File +# End Group +# Begin Group "Header Files" + +# PROP Default_Filter "h;hpp;hxx;hm;inl" +# Begin Source File + +SOURCE=..\..\crc32.h +# End Source File +# Begin Source File + +SOURCE=..\..\deflate.h +# End Source File +# Begin Source File + +SOURCE=..\..\inffast.h +# End Source File +# Begin Source File + +SOURCE=..\..\inffixed.h +# End Source File +# Begin Source File + +SOURCE=..\..\inflate.h +# End Source File +# Begin Source File + +SOURCE=..\..\inftrees.h +# End Source File +# Begin Source File + +SOURCE=..\..\trees.h +# End Source File +# Begin Source File + +SOURCE=..\..\zconf.h +# End Source File +# Begin Source File + +SOURCE=..\..\zlib.h +# End Source File +# Begin Source File + +SOURCE=..\..\zutil.h +# End Source File +# End Group +# Begin Group "Resource Files" + +# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" +# Begin Source File + +SOURCE=..\..\win32\zlib1.rc +# End Source File +# End Group +# Begin Group "Assembler Files (Unsupported)" + +# PROP Default_Filter "asm;obj;c;cpp;cxx;h;hpp;hxx" +# Begin Source File + +SOURCE=..\..\contrib\masmx86\gvmat32.asm + +!IF "$(CFG)" == "zlib - Win32 DLL Release" + +# PROP Exclude_From_Build 1 + +!ELSEIF "$(CFG)" == "zlib - Win32 DLL Debug" + +# PROP Exclude_From_Build 1 + +!ELSEIF "$(CFG)" == "zlib - Win32 DLL ASM Release" + +# Begin Custom Build - Assembling... +IntDir=.\Win32_DLL_ASM_Release +InputPath=..\..\contrib\masmx86\gvmat32.asm +InputName=gvmat32 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + ml.exe /nologo /c /coff /Cx /Fo"$(IntDir)\$(InputName).obj" "$(InputPath)" + +# End Custom Build + +!ELSEIF "$(CFG)" == "zlib - Win32 DLL ASM Debug" + +# Begin Custom Build - Assembling... +IntDir=.\Win32_DLL_ASM_Debug +InputPath=..\..\contrib\masmx86\gvmat32.asm +InputName=gvmat32 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + ml.exe /nologo /c /coff /Cx /Zi /Fo"$(IntDir)\$(InputName).obj" "$(InputPath)" + +# End Custom Build + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB Release" + +# PROP Exclude_From_Build 1 + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB Debug" + +# PROP Exclude_From_Build 1 + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB ASM Release" + +# Begin Custom Build - Assembling... +IntDir=.\Win32_LIB_ASM_Release +InputPath=..\..\contrib\masmx86\gvmat32.asm +InputName=gvmat32 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + ml.exe /nologo /c /coff /Cx /Fo"$(IntDir)\$(InputName).obj" "$(InputPath)" + +# End Custom Build + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB ASM Debug" + +# Begin Custom Build - Assembling... +IntDir=.\Win32_LIB_ASM_Debug +InputPath=..\..\contrib\masmx86\gvmat32.asm +InputName=gvmat32 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + ml.exe /nologo /c /coff /Cx /Zi /Fo"$(IntDir)\$(InputName).obj" "$(InputPath)" + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=..\..\contrib\masmx86\gvmat32c.c + +!IF "$(CFG)" == "zlib - Win32 DLL Release" + +# PROP Exclude_From_Build 1 +# ADD CPP /I "..\.." + +!ELSEIF "$(CFG)" == "zlib - Win32 DLL Debug" + +# PROP Exclude_From_Build 1 +# ADD CPP /I "..\.." + +!ELSEIF "$(CFG)" == "zlib - Win32 DLL ASM Release" + +# ADD CPP /I "..\.." + +!ELSEIF "$(CFG)" == "zlib - Win32 DLL ASM Debug" + +# ADD CPP /I "..\.." + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB Release" + +# PROP Exclude_From_Build 1 +# ADD CPP /I "..\.." + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB Debug" + +# PROP Exclude_From_Build 1 +# ADD CPP /I "..\.." + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB ASM Release" + +# ADD CPP /I "..\.." + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB ASM Debug" + +# ADD CPP /I "..\.." + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=..\..\contrib\masmx86\inffas32.asm + +!IF "$(CFG)" == "zlib - Win32 DLL Release" + +# PROP Exclude_From_Build 1 + +!ELSEIF "$(CFG)" == "zlib - Win32 DLL Debug" + +# PROP Exclude_From_Build 1 + +!ELSEIF "$(CFG)" == "zlib - Win32 DLL ASM Release" + +# Begin Custom Build - Assembling... +IntDir=.\Win32_DLL_ASM_Release +InputPath=..\..\contrib\masmx86\inffas32.asm +InputName=inffas32 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + ml.exe /nologo /c /coff /Cx /Fo"$(IntDir)\$(InputName).obj" "$(InputPath)" + +# End Custom Build + +!ELSEIF "$(CFG)" == "zlib - Win32 DLL ASM Debug" + +# Begin Custom Build - Assembling... +IntDir=.\Win32_DLL_ASM_Debug +InputPath=..\..\contrib\masmx86\inffas32.asm +InputName=inffas32 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + ml.exe /nologo /c /coff /Cx /Zi /Fo"$(IntDir)\$(InputName).obj" "$(InputPath)" + +# End Custom Build + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB Release" + +# PROP Exclude_From_Build 1 + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB Debug" + +# PROP Exclude_From_Build 1 + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB ASM Release" + +# Begin Custom Build - Assembling... +IntDir=.\Win32_LIB_ASM_Release +InputPath=..\..\contrib\masmx86\inffas32.asm +InputName=inffas32 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + ml.exe /nologo /c /coff /Cx /Fo"$(IntDir)\$(InputName).obj" "$(InputPath)" + +# End Custom Build + +!ELSEIF "$(CFG)" == "zlib - Win32 LIB ASM Debug" + +# Begin Custom Build - Assembling... +IntDir=.\Win32_LIB_ASM_Debug +InputPath=..\..\contrib\masmx86\inffas32.asm +InputName=inffas32 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + ml.exe /nologo /c /coff /Cx /Zi /Fo"$(IntDir)\$(InputName).obj" "$(InputPath)" + +# End Custom Build + +!ENDIF + +# End Source File +# End Group +# Begin Source File + +SOURCE=.\README.txt +# End Source File +# End Target +# End Project Added: external/zlib/projects/visualc6/zlib.dsw ============================================================================== --- (empty file) +++ external/zlib/projects/visualc6/zlib.dsw Tue Jan 3 07:42:59 2006 @@ -0,0 +1,59 @@ +Microsoft Developer Studio Workspace File, Format Version 6.00 +# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE! + +############################################################################### + +Project: "example"=.\example.dsp - Package Owner=<4> + +Package=<5> +{{{ +}}} + +Package=<4> +{{{ + Begin Project Dependency + Project_Dep_Name zlib + End Project Dependency +}}} + +############################################################################### + +Project: "minigzip"=.\minigzip.dsp - Package Owner=<4> + +Package=<5> +{{{ +}}} + +Package=<4> +{{{ + Begin Project Dependency + Project_Dep_Name zlib + End Project Dependency +}}} + +############################################################################### + +Project: "zlib"=.\zlib.dsp - Package Owner=<4> + +Package=<5> +{{{ +}}} + +Package=<4> +{{{ +}}} + +############################################################################### + +Global: + +Package=<5> +{{{ +}}} + +Package=<3> +{{{ +}}} + +############################################################################### + Added: external/zlib/qnx/package.qpg ============================================================================== --- (empty file) +++ external/zlib/qnx/package.qpg Tue Jan 3 07:42:59 2006 @@ -0,0 +1,141 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Library + + Medium + + 2.0 + + + + zlib + zlib + alain.bonnefoy at icbt.com + Public + public + www.gzip.org/zlib + + + Jean-Loup Gailly,Mark Adler + www.gzip.org/zlib + + zlib at gzip.org + + + A massively spiffy yet delicately unobtrusive compression library. + zlib is designed to be a free, general-purpose, legally unencumbered, lossless data compression library for use on virtually any computer hardware and operating system. + http://www.gzip.org/zlib + + + + + 1.2.3 + Medium + Stable + + + + + + + No License + + + + Software Development/Libraries and Extensions/C Libraries + zlib,compression + qnx6 + qnx6 + None + Developer + + + + + + + + + + + + + + Install + Post + No + Ignore + + No + Optional + + + + + + + + + + + + + InstallOver + zlib + + + + + + + + + + + + + InstallOver + zlib-dev + + + + + + + + + Added: external/zlib/trees.c ============================================================================== --- (empty file) +++ external/zlib/trees.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,1219 @@ +/* trees.c -- output deflated data using Huffman coding + * Copyright (C) 1995-2005 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process uses several Huffman trees. The more + * common source values are represented by shorter bit sequences. + * + * Each code tree is stored in a compressed form which is itself + * a Huffman encoding of the lengths of all the code strings (in + * ascending order by source values). The actual code strings are + * reconstructed from the lengths in the inflate process, as described + * in the deflate specification. + * + * REFERENCES + * + * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". + * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc + * + * Storer, James A. + * Data Compression: Methods and Theory, pp. 49-50. + * Computer Science Press, 1988. ISBN 0-7167-8156-5. + * + * Sedgewick, R. + * Algorithms, p290. + * Addison-Wesley, 1983. ISBN 0-201-06672-6. + */ + +/* @(#) $Id$ */ + +/* #define GEN_TREES_H */ + +#include "deflate.h" + +#ifdef DEBUG +# include +#endif + +/* =========================================================================== + * Constants + */ + +#define MAX_BL_BITS 7 +/* Bit length codes must not exceed MAX_BL_BITS bits */ + +#define END_BLOCK 256 +/* end of block literal code */ + +#define REP_3_6 16 +/* repeat previous bit length 3-6 times (2 bits of repeat count) */ + +#define REPZ_3_10 17 +/* repeat a zero length 3-10 times (3 bits of repeat count) */ + +#define REPZ_11_138 18 +/* repeat a zero length 11-138 times (7 bits of repeat count) */ + +local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ + = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; + +local const int extra_dbits[D_CODES] /* extra bits for each distance code */ + = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ + = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; + +local const uch bl_order[BL_CODES] + = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; +/* The lengths of the bit length codes are sent in order of decreasing + * probability, to avoid transmitting the lengths for unused bit length codes. + */ + +#define Buf_size (8 * 2*sizeof(char)) +/* Number of bits used within bi_buf. (bi_buf might be implemented on + * more than 16 bits on some systems.) + */ + +/* =========================================================================== + * Local data. These are initialized only once. + */ + +#define DIST_CODE_LEN 512 /* see definition of array dist_code below */ + +#if defined(GEN_TREES_H) || !defined(STDC) +/* non ANSI compilers may not accept trees.h */ + +local ct_data static_ltree[L_CODES+2]; +/* The static literal tree. Since the bit lengths are imposed, there is no + * need for the L_CODES extra codes used during heap construction. However + * The codes 286 and 287 are needed to build a canonical tree (see _tr_init + * below). + */ + +local ct_data static_dtree[D_CODES]; +/* The static distance tree. (Actually a trivial tree since all codes use + * 5 bits.) + */ + +uch _dist_code[DIST_CODE_LEN]; +/* Distance codes. The first 256 values correspond to the distances + * 3 .. 258, the last 256 values correspond to the top 8 bits of + * the 15 bit distances. + */ + +uch _length_code[MAX_MATCH-MIN_MATCH+1]; +/* length code for each normalized match length (0 == MIN_MATCH) */ + +local int base_length[LENGTH_CODES]; +/* First normalized length for each code (0 = MIN_MATCH) */ + +local int base_dist[D_CODES]; +/* First normalized distance for each code (0 = distance of 1) */ + +#else +# include "trees.h" +#endif /* GEN_TREES_H */ + +struct static_tree_desc_s { + const ct_data *static_tree; /* static tree or NULL */ + const intf *extra_bits; /* extra bits for each code or NULL */ + int extra_base; /* base index for extra_bits */ + int elems; /* max number of elements in the tree */ + int max_length; /* max bit length for the codes */ +}; + +local static_tree_desc static_l_desc = +{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; + +local static_tree_desc static_d_desc = +{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; + +local static_tree_desc static_bl_desc = +{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; + +/* =========================================================================== + * Local (static) routines in this file. + */ + +local void tr_static_init OF((void)); +local void init_block OF((deflate_state *s)); +local void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); +local void gen_bitlen OF((deflate_state *s, tree_desc *desc)); +local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); +local void build_tree OF((deflate_state *s, tree_desc *desc)); +local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local int build_bl_tree OF((deflate_state *s)); +local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, + int blcodes)); +local void compress_block OF((deflate_state *s, ct_data *ltree, + ct_data *dtree)); +local void set_data_type OF((deflate_state *s)); +local unsigned bi_reverse OF((unsigned value, int length)); +local void bi_windup OF((deflate_state *s)); +local void bi_flush OF((deflate_state *s)); +local void copy_block OF((deflate_state *s, charf *buf, unsigned len, + int header)); + +#ifdef GEN_TREES_H +local void gen_trees_header OF((void)); +#endif + +#ifndef DEBUG +# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) + /* Send a code of the given tree. c and tree must not have side effects */ + +#else /* DEBUG */ +# define send_code(s, c, tree) \ + { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ + send_bits(s, tree[c].Code, tree[c].Len); } +#endif + +/* =========================================================================== + * Output a short LSB first on the stream. + * IN assertion: there is enough room in pendingBuf. + */ +#define put_short(s, w) { \ + put_byte(s, (uch)((w) & 0xff)); \ + put_byte(s, (uch)((ush)(w) >> 8)); \ +} + +/* =========================================================================== + * Send a value on a given number of bits. + * IN assertion: length <= 16 and value fits in length bits. + */ +#ifdef DEBUG +local void send_bits OF((deflate_state *s, int value, int length)); + +local void send_bits(s, value, length) + deflate_state *s; + int value; /* value to send */ + int length; /* number of bits */ +{ + Tracevv((stderr," l %2d v %4x ", length, value)); + Assert(length > 0 && length <= 15, "invalid length"); + s->bits_sent += (ulg)length; + + /* If not enough room in bi_buf, use (valid) bits from bi_buf and + * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) + * unused bits in value. + */ + if (s->bi_valid > (int)Buf_size - length) { + s->bi_buf |= (value << s->bi_valid); + put_short(s, s->bi_buf); + s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); + s->bi_valid += length - Buf_size; + } else { + s->bi_buf |= value << s->bi_valid; + s->bi_valid += length; + } +} +#else /* !DEBUG */ + +#define send_bits(s, value, length) \ +{ int len = length;\ + if (s->bi_valid > (int)Buf_size - len) {\ + int val = value;\ + s->bi_buf |= (val << s->bi_valid);\ + put_short(s, s->bi_buf);\ + s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ + s->bi_valid += len - Buf_size;\ + } else {\ + s->bi_buf |= (value) << s->bi_valid;\ + s->bi_valid += len;\ + }\ +} +#endif /* DEBUG */ + + +/* the arguments must not have side effects */ + +/* =========================================================================== + * Initialize the various 'constant' tables. + */ +local void tr_static_init() +{ +#if defined(GEN_TREES_H) || !defined(STDC) + static int static_init_done = 0; + int n; /* iterates over tree elements */ + int bits; /* bit counter */ + int length; /* length value */ + int code; /* code value */ + int dist; /* distance index */ + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + if (static_init_done) return; + + /* For some embedded targets, global variables are not initialized: */ + static_l_desc.static_tree = static_ltree; + static_l_desc.extra_bits = extra_lbits; + static_d_desc.static_tree = static_dtree; + static_d_desc.extra_bits = extra_dbits; + static_bl_desc.extra_bits = extra_blbits; + + /* Initialize the mapping length (0..255) -> length code (0..28) */ + length = 0; + for (code = 0; code < LENGTH_CODES-1; code++) { + base_length[code] = length; + for (n = 0; n < (1< dist code (0..29) */ + dist = 0; + for (code = 0 ; code < 16; code++) { + base_dist[code] = dist; + for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ + for ( ; code < D_CODES; code++) { + base_dist[code] = dist << 7; + for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { + _dist_code[256 + dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: 256+dist != 512"); + + /* Construct the codes of the static literal tree */ + for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; + n = 0; + while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; + while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; + while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; + while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; + /* Codes 286 and 287 do not exist, but we must include them in the + * tree construction to get a canonical Huffman tree (longest code + * all ones) + */ + gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); + + /* The static distance tree is trivial: */ + for (n = 0; n < D_CODES; n++) { + static_dtree[n].Len = 5; + static_dtree[n].Code = bi_reverse((unsigned)n, 5); + } + static_init_done = 1; + +# ifdef GEN_TREES_H + gen_trees_header(); +# endif +#endif /* defined(GEN_TREES_H) || !defined(STDC) */ +} + +/* =========================================================================== + * Genererate the file trees.h describing the static trees. + */ +#ifdef GEN_TREES_H +# ifndef DEBUG +# include +# endif + +# define SEPARATOR(i, last, width) \ + ((i) == (last)? "\n};\n\n" : \ + ((i) % (width) == (width)-1 ? ",\n" : ", ")) + +void gen_trees_header() +{ + FILE *header = fopen("trees.h", "w"); + int i; + + Assert (header != NULL, "Can't open trees.h"); + fprintf(header, + "/* header created automatically with -DGEN_TREES_H */\n\n"); + + fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); + for (i = 0; i < L_CODES+2; i++) { + fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, + static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); + } + + fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, + static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); + } + + fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n"); + for (i = 0; i < DIST_CODE_LEN; i++) { + fprintf(header, "%2u%s", _dist_code[i], + SEPARATOR(i, DIST_CODE_LEN-1, 20)); + } + + fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); + for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { + fprintf(header, "%2u%s", _length_code[i], + SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); + } + + fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); + for (i = 0; i < LENGTH_CODES; i++) { + fprintf(header, "%1u%s", base_length[i], + SEPARATOR(i, LENGTH_CODES-1, 20)); + } + + fprintf(header, "local const int base_dist[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "%5u%s", base_dist[i], + SEPARATOR(i, D_CODES-1, 10)); + } + + fclose(header); +} +#endif /* GEN_TREES_H */ + +/* =========================================================================== + * Initialize the tree data structures for a new zlib stream. + */ +void _tr_init(s) + deflate_state *s; +{ + tr_static_init(); + + s->l_desc.dyn_tree = s->dyn_ltree; + s->l_desc.stat_desc = &static_l_desc; + + s->d_desc.dyn_tree = s->dyn_dtree; + s->d_desc.stat_desc = &static_d_desc; + + s->bl_desc.dyn_tree = s->bl_tree; + s->bl_desc.stat_desc = &static_bl_desc; + + s->bi_buf = 0; + s->bi_valid = 0; + s->last_eob_len = 8; /* enough lookahead for inflate */ +#ifdef DEBUG + s->compressed_len = 0L; + s->bits_sent = 0L; +#endif + + /* Initialize the first block of the first file: */ + init_block(s); +} + +/* =========================================================================== + * Initialize a new block. + */ +local void init_block(s) + deflate_state *s; +{ + int n; /* iterates over tree elements */ + + /* Initialize the trees. */ + for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; + for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; + for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; + + s->dyn_ltree[END_BLOCK].Freq = 1; + s->opt_len = s->static_len = 0L; + s->last_lit = s->matches = 0; +} + +#define SMALLEST 1 +/* Index within the heap array of least frequent node in the Huffman tree */ + + +/* =========================================================================== + * Remove the smallest element from the heap and recreate the heap with + * one less element. Updates heap and heap_len. + */ +#define pqremove(s, tree, top) \ +{\ + top = s->heap[SMALLEST]; \ + s->heap[SMALLEST] = s->heap[s->heap_len--]; \ + pqdownheap(s, tree, SMALLEST); \ +} + +/* =========================================================================== + * Compares to subtrees, using the tree depth as tie breaker when + * the subtrees have equal frequency. This minimizes the worst case length. + */ +#define smaller(tree, n, m, depth) \ + (tree[n].Freq < tree[m].Freq || \ + (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) + +/* =========================================================================== + * Restore the heap property by moving down the tree starting at node k, + * exchanging a node with the smallest of its two sons if necessary, stopping + * when the heap property is re-established (each father smaller than its + * two sons). + */ +local void pqdownheap(s, tree, k) + deflate_state *s; + ct_data *tree; /* the tree to restore */ + int k; /* node to move down */ +{ + int v = s->heap[k]; + int j = k << 1; /* left son of k */ + while (j <= s->heap_len) { + /* Set j to the smallest of the two sons: */ + if (j < s->heap_len && + smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { + j++; + } + /* Exit if v is smaller than both sons */ + if (smaller(tree, v, s->heap[j], s->depth)) break; + + /* Exchange v with the smallest son */ + s->heap[k] = s->heap[j]; k = j; + + /* And continue down the tree, setting j to the left son of k */ + j <<= 1; + } + s->heap[k] = v; +} + +/* =========================================================================== + * Compute the optimal bit lengths for a tree and update the total bit length + * for the current block. + * IN assertion: the fields freq and dad are set, heap[heap_max] and + * above are the tree nodes sorted by increasing frequency. + * OUT assertions: the field len is set to the optimal bit length, the + * array bl_count contains the frequencies for each bit length. + * The length opt_len is updated; static_len is also updated if stree is + * not null. + */ +local void gen_bitlen(s, desc) + deflate_state *s; + tree_desc *desc; /* the tree descriptor */ +{ + ct_data *tree = desc->dyn_tree; + int max_code = desc->max_code; + const ct_data *stree = desc->stat_desc->static_tree; + const intf *extra = desc->stat_desc->extra_bits; + int base = desc->stat_desc->extra_base; + int max_length = desc->stat_desc->max_length; + int h; /* heap index */ + int n, m; /* iterate over the tree elements */ + int bits; /* bit length */ + int xbits; /* extra bits */ + ush f; /* frequency */ + int overflow = 0; /* number of elements with bit length too large */ + + for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; + + /* In a first pass, compute the optimal bit lengths (which may + * overflow in the case of the bit length tree). + */ + tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ + + for (h = s->heap_max+1; h < HEAP_SIZE; h++) { + n = s->heap[h]; + bits = tree[tree[n].Dad].Len + 1; + if (bits > max_length) bits = max_length, overflow++; + tree[n].Len = (ush)bits; + /* We overwrite tree[n].Dad which is no longer needed */ + + if (n > max_code) continue; /* not a leaf node */ + + s->bl_count[bits]++; + xbits = 0; + if (n >= base) xbits = extra[n-base]; + f = tree[n].Freq; + s->opt_len += (ulg)f * (bits + xbits); + if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits); + } + if (overflow == 0) return; + + Trace((stderr,"\nbit length overflow\n")); + /* This happens for example on obj2 and pic of the Calgary corpus */ + + /* Find the first bit length which could increase: */ + do { + bits = max_length-1; + while (s->bl_count[bits] == 0) bits--; + s->bl_count[bits]--; /* move one leaf down the tree */ + s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ + s->bl_count[max_length]--; + /* The brother of the overflow item also moves one step up, + * but this does not affect bl_count[max_length] + */ + overflow -= 2; + } while (overflow > 0); + + /* Now recompute all bit lengths, scanning in increasing frequency. + * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all + * lengths instead of fixing only the wrong ones. This idea is taken + * from 'ar' written by Haruhiko Okumura.) + */ + for (bits = max_length; bits != 0; bits--) { + n = s->bl_count[bits]; + while (n != 0) { + m = s->heap[--h]; + if (m > max_code) continue; + if ((unsigned) tree[m].Len != (unsigned) bits) { + Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); + s->opt_len += ((long)bits - (long)tree[m].Len) + *(long)tree[m].Freq; + tree[m].Len = (ush)bits; + } + n--; + } + } +} + +/* =========================================================================== + * Generate the codes for a given tree and bit counts (which need not be + * optimal). + * IN assertion: the array bl_count contains the bit length statistics for + * the given tree and the field len is set for all tree elements. + * OUT assertion: the field code is set for all tree elements of non + * zero code length. + */ +local void gen_codes (tree, max_code, bl_count) + ct_data *tree; /* the tree to decorate */ + int max_code; /* largest code with non zero frequency */ + ushf *bl_count; /* number of codes at each bit length */ +{ + ush next_code[MAX_BITS+1]; /* next code value for each bit length */ + ush code = 0; /* running code value */ + int bits; /* bit index */ + int n; /* code index */ + + /* The distribution counts are first used to generate the code values + * without bit reversal. + */ + for (bits = 1; bits <= MAX_BITS; bits++) { + next_code[bits] = code = (code + bl_count[bits-1]) << 1; + } + /* Check that the bit counts in bl_count are consistent. The last code + * must be all ones. + */ + Assert (code + bl_count[MAX_BITS]-1 == (1<dyn_tree; + const ct_data *stree = desc->stat_desc->static_tree; + int elems = desc->stat_desc->elems; + int n, m; /* iterate over heap elements */ + int max_code = -1; /* largest code with non zero frequency */ + int node; /* new node being created */ + + /* Construct the initial heap, with least frequent element in + * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. + * heap[0] is not used. + */ + s->heap_len = 0, s->heap_max = HEAP_SIZE; + + for (n = 0; n < elems; n++) { + if (tree[n].Freq != 0) { + s->heap[++(s->heap_len)] = max_code = n; + s->depth[n] = 0; + } else { + tree[n].Len = 0; + } + } + + /* The pkzip format requires that at least one distance code exists, + * and that at least one bit should be sent even if there is only one + * possible code. So to avoid special checks later on we force at least + * two codes of non zero frequency. + */ + while (s->heap_len < 2) { + node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); + tree[node].Freq = 1; + s->depth[node] = 0; + s->opt_len--; if (stree) s->static_len -= stree[node].Len; + /* node is 0 or 1 so it does not have extra bits */ + } + desc->max_code = max_code; + + /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, + * establish sub-heaps of increasing lengths: + */ + for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); + + /* Construct the Huffman tree by repeatedly combining the least two + * frequent nodes. + */ + node = elems; /* next internal node of the tree */ + do { + pqremove(s, tree, n); /* n = node of least frequency */ + m = s->heap[SMALLEST]; /* m = node of next least frequency */ + + s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ + s->heap[--(s->heap_max)] = m; + + /* Create a new node father of n and m */ + tree[node].Freq = tree[n].Freq + tree[m].Freq; + s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ? + s->depth[n] : s->depth[m]) + 1); + tree[n].Dad = tree[m].Dad = (ush)node; +#ifdef DUMP_BL_TREE + if (tree == s->bl_tree) { + fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", + node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); + } +#endif + /* and insert the new node in the heap */ + s->heap[SMALLEST] = node++; + pqdownheap(s, tree, SMALLEST); + + } while (s->heap_len >= 2); + + s->heap[--(s->heap_max)] = s->heap[SMALLEST]; + + /* At this point, the fields freq and dad are set. We can now + * generate the bit lengths. + */ + gen_bitlen(s, (tree_desc *)desc); + + /* The field len is now set, we can generate the bit codes */ + gen_codes ((ct_data *)tree, max_code, s->bl_count); +} + +/* =========================================================================== + * Scan a literal or distance tree to determine the frequencies of the codes + * in the bit length tree. + */ +local void scan_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + if (nextlen == 0) max_count = 138, min_count = 3; + tree[max_code+1].Len = (ush)0xffff; /* guard */ + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + s->bl_tree[curlen].Freq += count; + } else if (curlen != 0) { + if (curlen != prevlen) s->bl_tree[curlen].Freq++; + s->bl_tree[REP_3_6].Freq++; + } else if (count <= 10) { + s->bl_tree[REPZ_3_10].Freq++; + } else { + s->bl_tree[REPZ_11_138].Freq++; + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Send a literal or distance tree in compressed form, using the codes in + * bl_tree. + */ +local void send_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + /* tree[max_code+1].Len = -1; */ /* guard already set */ + if (nextlen == 0) max_count = 138, min_count = 3; + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + do { send_code(s, curlen, s->bl_tree); } while (--count != 0); + + } else if (curlen != 0) { + if (curlen != prevlen) { + send_code(s, curlen, s->bl_tree); count--; + } + Assert(count >= 3 && count <= 6, " 3_6?"); + send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); + + } else if (count <= 10) { + send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); + + } else { + send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Construct the Huffman tree for the bit lengths and return the index in + * bl_order of the last bit length code to send. + */ +local int build_bl_tree(s) + deflate_state *s; +{ + int max_blindex; /* index of last bit length code of non zero freq */ + + /* Determine the bit length frequencies for literal and distance trees */ + scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); + scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); + + /* Build the bit length tree: */ + build_tree(s, (tree_desc *)(&(s->bl_desc))); + /* opt_len now includes the length of the tree representations, except + * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. + */ + + /* Determine the number of bit length codes to send. The pkzip format + * requires that at least 4 bit length codes be sent. (appnote.txt says + * 3 but the actual value used is 4.) + */ + for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { + if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; + } + /* Update opt_len to include the bit length tree and counts */ + s->opt_len += 3*(max_blindex+1) + 5+5+4; + Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", + s->opt_len, s->static_len)); + + return max_blindex; +} + +/* =========================================================================== + * Send the header for a block using dynamic Huffman trees: the counts, the + * lengths of the bit length codes, the literal tree and the distance tree. + * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. + */ +local void send_all_trees(s, lcodes, dcodes, blcodes) + deflate_state *s; + int lcodes, dcodes, blcodes; /* number of codes for each tree */ +{ + int rank; /* index in bl_order */ + + Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); + Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, + "too many codes"); + Tracev((stderr, "\nbl counts: ")); + send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ + send_bits(s, dcodes-1, 5); + send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ + for (rank = 0; rank < blcodes; rank++) { + Tracev((stderr, "\nbl code %2d ", bl_order[rank])); + send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); + } + Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ + Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ + Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); +} + +/* =========================================================================== + * Send a stored block + */ +void _tr_stored_block(s, buf, stored_len, eof) + deflate_state *s; + charf *buf; /* input block */ + ulg stored_len; /* length of input block */ + int eof; /* true if this is the last block for a file */ +{ + send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */ +#ifdef DEBUG + s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; + s->compressed_len += (stored_len + 4) << 3; +#endif + copy_block(s, buf, (unsigned)stored_len, 1); /* with header */ +} + +/* =========================================================================== + * Send one empty static block to give enough lookahead for inflate. + * This takes 10 bits, of which 7 may remain in the bit buffer. + * The current inflate code requires 9 bits of lookahead. If the + * last two codes for the previous block (real code plus EOB) were coded + * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode + * the last real code. In this case we send two empty static blocks instead + * of one. (There are no problems if the previous block is stored or fixed.) + * To simplify the code, we assume the worst case of last real code encoded + * on one bit only. + */ +void _tr_align(s) + deflate_state *s; +{ + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); +#ifdef DEBUG + s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ +#endif + bi_flush(s); + /* Of the 10 bits for the empty block, we have already sent + * (10 - bi_valid) bits. The lookahead for the last real code (before + * the EOB of the previous block) was thus at least one plus the length + * of the EOB plus what we have just sent of the empty static block. + */ + if (1 + s->last_eob_len + 10 - s->bi_valid < 9) { + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); +#ifdef DEBUG + s->compressed_len += 10L; +#endif + bi_flush(s); + } + s->last_eob_len = 7; +} + +/* =========================================================================== + * Determine the best encoding for the current block: dynamic trees, static + * trees or store, and output the encoded block to the zip file. + */ +void _tr_flush_block(s, buf, stored_len, eof) + deflate_state *s; + charf *buf; /* input block, or NULL if too old */ + ulg stored_len; /* length of input block */ + int eof; /* true if this is the last block for a file */ +{ + ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ + int max_blindex = 0; /* index of last bit length code of non zero freq */ + + /* Build the Huffman trees unless a stored block is forced */ + if (s->level > 0) { + + /* Check if the file is binary or text */ + if (stored_len > 0 && s->strm->data_type == Z_UNKNOWN) + set_data_type(s); + + /* Construct the literal and distance trees */ + build_tree(s, (tree_desc *)(&(s->l_desc))); + Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + + build_tree(s, (tree_desc *)(&(s->d_desc))); + Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + /* At this point, opt_len and static_len are the total bit lengths of + * the compressed block data, excluding the tree representations. + */ + + /* Build the bit length tree for the above two trees, and get the index + * in bl_order of the last bit length code to send. + */ + max_blindex = build_bl_tree(s); + + /* Determine the best encoding. Compute the block lengths in bytes. */ + opt_lenb = (s->opt_len+3+7)>>3; + static_lenb = (s->static_len+3+7)>>3; + + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, + s->last_lit)); + + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; + + } else { + Assert(buf != (char*)0, "lost buf"); + opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ + } + +#ifdef FORCE_STORED + if (buf != (char*)0) { /* force stored block */ +#else + if (stored_len+4 <= opt_lenb && buf != (char*)0) { + /* 4: two words for the lengths */ +#endif + /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. + * Otherwise we can't have processed more than WSIZE input bytes since + * the last block flush, because compression would have been + * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to + * transform a block into a stored block. + */ + _tr_stored_block(s, buf, stored_len, eof); + +#ifdef FORCE_STATIC + } else if (static_lenb >= 0) { /* force static trees */ +#else + } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) { +#endif + send_bits(s, (STATIC_TREES<<1)+eof, 3); + compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree); +#ifdef DEBUG + s->compressed_len += 3 + s->static_len; +#endif + } else { + send_bits(s, (DYN_TREES<<1)+eof, 3); + send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, + max_blindex+1); + compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree); +#ifdef DEBUG + s->compressed_len += 3 + s->opt_len; +#endif + } + Assert (s->compressed_len == s->bits_sent, "bad compressed size"); + /* The above check is made mod 2^32, for files larger than 512 MB + * and uLong implemented on 32 bits. + */ + init_block(s); + + if (eof) { + bi_windup(s); +#ifdef DEBUG + s->compressed_len += 7; /* align on byte boundary */ +#endif + } + Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, + s->compressed_len-7*eof)); +} + +/* =========================================================================== + * Save the match info and tally the frequency counts. Return true if + * the current block must be flushed. + */ +int _tr_tally (s, dist, lc) + deflate_state *s; + unsigned dist; /* distance of matched string */ + unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ +{ + s->d_buf[s->last_lit] = (ush)dist; + s->l_buf[s->last_lit++] = (uch)lc; + if (dist == 0) { + /* lc is the unmatched char */ + s->dyn_ltree[lc].Freq++; + } else { + s->matches++; + /* Here, lc is the match length - MIN_MATCH */ + dist--; /* dist = match distance - 1 */ + Assert((ush)dist < (ush)MAX_DIST(s) && + (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && + (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); + + s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; + s->dyn_dtree[d_code(dist)].Freq++; + } + +#ifdef TRUNCATE_BLOCK + /* Try to guess if it is profitable to stop the current block here */ + if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { + /* Compute an upper bound for the compressed length */ + ulg out_length = (ulg)s->last_lit*8L; + ulg in_length = (ulg)((long)s->strstart - s->block_start); + int dcode; + for (dcode = 0; dcode < D_CODES; dcode++) { + out_length += (ulg)s->dyn_dtree[dcode].Freq * + (5L+extra_dbits[dcode]); + } + out_length >>= 3; + Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", + s->last_lit, in_length, out_length, + 100L - out_length*100L/in_length)); + if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; + } +#endif + return (s->last_lit == s->lit_bufsize-1); + /* We avoid equality with lit_bufsize because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ +} + +/* =========================================================================== + * Send the block data compressed using the given Huffman trees + */ +local void compress_block(s, ltree, dtree) + deflate_state *s; + ct_data *ltree; /* literal tree */ + ct_data *dtree; /* distance tree */ +{ + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ + unsigned lx = 0; /* running index in l_buf */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + + if (s->last_lit != 0) do { + dist = s->d_buf[lx]; + lc = s->l_buf[lx++]; + if (dist == 0) { + send_code(s, lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); + } else { + /* Here, lc is the match length - MIN_MATCH */ + code = _length_code[lc]; + send_code(s, code+LITERALS+1, ltree); /* send the length code */ + extra = extra_lbits[code]; + if (extra != 0) { + lc -= base_length[code]; + send_bits(s, lc, extra); /* send the extra length bits */ + } + dist--; /* dist is now the match distance - 1 */ + code = d_code(dist); + Assert (code < D_CODES, "bad d_code"); + + send_code(s, code, dtree); /* send the distance code */ + extra = extra_dbits[code]; + if (extra != 0) { + dist -= base_dist[code]; + send_bits(s, dist, extra); /* send the extra distance bits */ + } + } /* literal or match pair ? */ + + /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ + Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, + "pendingBuf overflow"); + + } while (lx < s->last_lit); + + send_code(s, END_BLOCK, ltree); + s->last_eob_len = ltree[END_BLOCK].Len; +} + +/* =========================================================================== + * Set the data type to BINARY or TEXT, using a crude approximation: + * set it to Z_TEXT if all symbols are either printable characters (33 to 255) + * or white spaces (9 to 13, or 32); or set it to Z_BINARY otherwise. + * IN assertion: the fields Freq of dyn_ltree are set. + */ +local void set_data_type(s) + deflate_state *s; +{ + int n; + + for (n = 0; n < 9; n++) + if (s->dyn_ltree[n].Freq != 0) + break; + if (n == 9) + for (n = 14; n < 32; n++) + if (s->dyn_ltree[n].Freq != 0) + break; + s->strm->data_type = (n == 32) ? Z_TEXT : Z_BINARY; +} + +/* =========================================================================== + * Reverse the first len bits of a code, using straightforward code (a faster + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +local unsigned bi_reverse(code, len) + unsigned code; /* the value to invert */ + int len; /* its bit length */ +{ + register unsigned res = 0; + do { + res |= code & 1; + code >>= 1, res <<= 1; + } while (--len > 0); + return res >> 1; +} + +/* =========================================================================== + * Flush the bit buffer, keeping at most 7 bits in it. + */ +local void bi_flush(s) + deflate_state *s; +{ + if (s->bi_valid == 16) { + put_short(s, s->bi_buf); + s->bi_buf = 0; + s->bi_valid = 0; + } else if (s->bi_valid >= 8) { + put_byte(s, (Byte)s->bi_buf); + s->bi_buf >>= 8; + s->bi_valid -= 8; + } +} + +/* =========================================================================== + * Flush the bit buffer and align the output on a byte boundary + */ +local void bi_windup(s) + deflate_state *s; +{ + if (s->bi_valid > 8) { + put_short(s, s->bi_buf); + } else if (s->bi_valid > 0) { + put_byte(s, (Byte)s->bi_buf); + } + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef DEBUG + s->bits_sent = (s->bits_sent+7) & ~7; +#endif +} + +/* =========================================================================== + * Copy a stored block, storing first the length and its + * one's complement if requested. + */ +local void copy_block(s, buf, len, header) + deflate_state *s; + charf *buf; /* the input data */ + unsigned len; /* its length */ + int header; /* true if block header must be written */ +{ + bi_windup(s); /* align on byte boundary */ + s->last_eob_len = 8; /* enough lookahead for inflate */ + + if (header) { + put_short(s, (ush)len); + put_short(s, (ush)~len); +#ifdef DEBUG + s->bits_sent += 2*16; +#endif + } +#ifdef DEBUG + s->bits_sent += (ulg)len<<3; +#endif + while (len--) { + put_byte(s, *buf++); + } +} Added: external/zlib/trees.h ============================================================================== --- (empty file) +++ external/zlib/trees.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,128 @@ +/* header created automatically with -DGEN_TREES_H */ + +local const ct_data static_ltree[L_CODES+2] = { +{{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}}, +{{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}}, +{{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}}, +{{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}}, +{{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}}, +{{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}}, +{{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}}, +{{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}}, +{{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}}, +{{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}}, +{{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}}, +{{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}}, +{{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}}, +{{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}}, +{{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}}, +{{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}}, +{{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}}, +{{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}}, +{{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}}, +{{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}}, +{{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}}, +{{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}}, +{{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}}, +{{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}}, +{{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}}, +{{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}}, +{{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}}, +{{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}}, +{{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}}, +{{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}}, +{{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}}, +{{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}}, +{{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}}, +{{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}}, +{{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}}, +{{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}}, +{{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}}, +{{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}}, +{{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}}, +{{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}}, +{{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}}, +{{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}}, +{{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}}, +{{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}}, +{{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}}, +{{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}}, +{{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}}, +{{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}}, +{{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}}, +{{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}}, +{{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}}, +{{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}}, +{{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}}, +{{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}}, +{{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}}, +{{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}}, +{{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}}, +{{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}} +}; + +local const ct_data static_dtree[D_CODES] = { +{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}}, +{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}}, +{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}}, +{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}}, +{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}}, +{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}} +}; + +const uch _dist_code[DIST_CODE_LEN] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, + 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, +10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, +11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, +12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, +18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 +}; + +const uch _length_code[MAX_MATCH-MIN_MATCH+1]= { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, +13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, +17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, +19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, +21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, +22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28 +}; + +local const int base_length[LENGTH_CODES] = { +0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, +64, 80, 96, 112, 128, 160, 192, 224, 0 +}; + +local const int base_dist[D_CODES] = { + 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, + 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, + 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576 +}; + Added: external/zlib/uncompr.c ============================================================================== --- (empty file) +++ external/zlib/uncompr.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,61 @@ +/* uncompr.c -- decompress a memory buffer + * Copyright (C) 1995-2003 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#define ZLIB_INTERNAL +#include "zlib.h" + +/* =========================================================================== + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer. + This function can be used to decompress a whole file at once if the + input file is mmap'ed. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted. +*/ +int ZEXPORT uncompress (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; +{ + z_stream stream; + int err; + + stream.next_in = (Bytef*)source; + stream.avail_in = (uInt)sourceLen; + /* Check for source > 64K on 16-bit machine: */ + if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR; + + stream.next_out = dest; + stream.avail_out = (uInt)*destLen; + if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR; + + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + + err = inflateInit(&stream); + if (err != Z_OK) return err; + + err = inflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + inflateEnd(&stream); + if (err == Z_NEED_DICT || (err == Z_BUF_ERROR && stream.avail_in == 0)) + return Z_DATA_ERROR; + return err; + } + *destLen = stream.total_out; + + err = inflateEnd(&stream); + return err; +} Added: external/zlib/win32/DLL_FAQ.txt ============================================================================== --- (empty file) +++ external/zlib/win32/DLL_FAQ.txt Tue Jan 3 07:42:59 2006 @@ -0,0 +1,397 @@ + + Frequently Asked Questions about ZLIB1.DLL + + +This document describes the design, the rationale, and the usage +of the official DLL build of zlib, named ZLIB1.DLL. If you have +general questions about zlib, you should see the file "FAQ" found +in the zlib distribution, or at the following location: + http://www.gzip.org/zlib/zlib_faq.html + + + 1. What is ZLIB1.DLL, and how can I get it? + + - ZLIB1.DLL is the official build of zlib as a DLL. + (Please remark the character '1' in the name.) + + Pointers to a precompiled ZLIB1.DLL can be found in the zlib + web site at: + http://www.zlib.org/ + + Applications that link to ZLIB1.DLL can rely on the following + specification: + + * The exported symbols are exclusively defined in the source + files "zlib.h" and "zlib.def", found in an official zlib + source distribution. + * The symbols are exported by name, not by ordinal. + * The exported names are undecorated. + * The calling convention of functions is "C" (CDECL). + * The ZLIB1.DLL binary is linked to MSVCRT.DLL. + + The archive in which ZLIB1.DLL is bundled contains compiled + test programs that must run with a valid build of ZLIB1.DLL. + It is recommended to download the prebuilt DLL from the zlib + web site, instead of building it yourself, to avoid potential + incompatibilities that could be introduced by your compiler + and build settings. If you do build the DLL yourself, please + make sure that it complies with all the above requirements, + and it runs with the precompiled test programs, bundled with + the original ZLIB1.DLL distribution. + + If, for any reason, you need to build an incompatible DLL, + please use a different file name. + + + 2. Why did you change the name of the DLL to ZLIB1.DLL? + What happened to the old ZLIB.DLL? + + - The old ZLIB.DLL, built from zlib-1.1.4 or earlier, required + compilation settings that were incompatible to those used by + a static build. The DLL settings were supposed to be enabled + by defining the macro ZLIB_DLL, before including "zlib.h". + Incorrect handling of this macro was silently accepted at + build time, resulting in two major problems: + + * ZLIB_DLL was missing from the old makefile. When building + the DLL, not all people added it to the build options. In + consequence, incompatible incarnations of ZLIB.DLL started + to circulate around the net. + + * When switching from using the static library to using the + DLL, applications had to define the ZLIB_DLL macro and + to recompile all the sources that contained calls to zlib + functions. Failure to do so resulted in creating binaries + that were unable to run with the official ZLIB.DLL build. + + The only possible solution that we could foresee was to make + a binary-incompatible change in the DLL interface, in order to + remove the dependency on the ZLIB_DLL macro, and to release + the new DLL under a different name. + + We chose the name ZLIB1.DLL, where '1' indicates the major + zlib version number. We hope that we will not have to break + the binary compatibility again, at least not as long as the + zlib-1.x series will last. + + There is still a ZLIB_DLL macro, that can trigger a more + efficient build and use of the DLL, but compatibility no + longer dependents on it. + + + 3. Can I build ZLIB.DLL from the new zlib sources, and replace + an old ZLIB.DLL, that was built from zlib-1.1.4 or earlier? + + - In principle, you can do it by assigning calling convention + keywords to the macros ZEXPORT and ZEXPORTVA. In practice, + it depends on what you mean by "an old ZLIB.DLL", because the + old DLL exists in several mutually-incompatible versions. + You have to find out first what kind of calling convention is + being used in your particular ZLIB.DLL build, and to use the + same one in the new build. If you don't know what this is all + about, you might be better off if you would just leave the old + DLL intact. + + + 4. Can I compile my application using the new zlib interface, and + link it to an old ZLIB.DLL, that was built from zlib-1.1.4 or + earlier? + + - The official answer is "no"; the real answer depends again on + what kind of ZLIB.DLL you have. Even if you are lucky, this + course of action is unreliable. + + If you rebuild your application and you intend to use a newer + version of zlib (post- 1.1.4), it is strongly recommended to + link it to the new ZLIB1.DLL. + + + 5. Why are the zlib symbols exported by name, and not by ordinal? + + - Although exporting symbols by ordinal is a little faster, it + is risky. Any single glitch in the maintenance or use of the + DEF file that contains the ordinals can result in incompatible + builds and frustrating crashes. Simply put, the benefits of + exporting symbols by ordinal do not justify the risks. + + Technically, it should be possible to maintain ordinals in + the DEF file, and still export the symbols by name. Ordinals + exist in every DLL, and even if the dynamic linking performed + at the DLL startup is searching for names, ordinals serve as + hints, for a faster name lookup. However, if the DEF file + contains ordinals, the Microsoft linker automatically builds + an implib that will cause the executables linked to it to use + those ordinals, and not the names. It is interesting to + notice that the GNU linker for Win32 does not suffer from this + problem. + + It is possible to avoid the DEF file if the exported symbols + are accompanied by a "__declspec(dllexport)" attribute in the + source files. You can do this in zlib by predefining the + ZLIB_DLL macro. + + + 6. I see that the ZLIB1.DLL functions use the "C" (CDECL) calling + convention. Why not use the STDCALL convention? + STDCALL is the standard convention in Win32, and I need it in + my Visual Basic project! + + (For readability, we use CDECL to refer to the convention + triggered by the "__cdecl" keyword, STDCALL to refer to + the convention triggered by "__stdcall", and FASTCALL to + refer to the convention triggered by "__fastcall".) + + - Most of the native Windows API functions (without varargs) use + indeed the WINAPI convention (which translates to STDCALL in + Win32), but the standard C functions use CDECL. If a user + application is intrinsically tied to the Windows API (e.g. + it calls native Windows API functions such as CreateFile()), + sometimes it makes sense to decorate its own functions with + WINAPI. But if ANSI C or POSIX portability is a goal (e.g. + it calls standard C functions such as fopen()), it is not a + sound decision to request the inclusion of , or to + use non-ANSI constructs, for the sole purpose to make the user + functions STDCALL-able. + + The functionality offered by zlib is not in the category of + "Windows functionality", but is more like "C functionality". + + Technically, STDCALL is not bad; in fact, it is slightly + faster than CDECL, and it works with variable-argument + functions, just like CDECL. It is unfortunate that, in spite + of using STDCALL in the Windows API, it is not the default + convention used by the C compilers that run under Windows. + The roots of the problem reside deep inside the unsafety of + the K&R-style function prototypes, where the argument types + are not specified; but that is another story for another day. + + The remaining fact is that CDECL is the default convention. + Even if an explicit convention is hard-coded into the function + prototypes inside C headers, problems may appear. The + necessity to expose the convention in users' callbacks is one + of these problems. + + The calling convention issues are also important when using + zlib in other programming languages. Some of them, like Ada + (GNAT) and Fortran (GNU G77), have C bindings implemented + initially on Unix, and relying on the C calling convention. + On the other hand, the pre- .NET versions of Microsoft Visual + Basic require STDCALL, while Borland Delphi prefers, although + it does not require, FASTCALL. + + In fairness to all possible uses of zlib outside the C + programming language, we choose the default "C" convention. + Anyone interested in different bindings or conventions is + encouraged to maintain specialized projects. The "contrib/" + directory from the zlib distribution already holds a couple + of foreign bindings, such as Ada, C++, and Delphi. + + + 7. I need a DLL for my Visual Basic project. What can I do? + + - Define the ZLIB_WINAPI macro before including "zlib.h", when + building both the DLL and the user application (except that + you don't need to define anything when using the DLL in Visual + Basic). The ZLIB_WINAPI macro will switch on the WINAPI + (STDCALL) convention. The name of this DLL must be different + than the official ZLIB1.DLL. + + Gilles Vollant has contributed a build named ZLIBWAPI.DLL, + with the ZLIB_WINAPI macro turned on, and with the minizip + functionality built in. For more information, please read + the notes inside "contrib/vstudio/readme.txt", found in the + zlib distribution. + + + 8. I need to use zlib in my Microsoft .NET project. What can I + do? + + - Henrik Ravn has contributed a .NET wrapper around zlib. Look + into contrib/dotzlib/, inside the zlib distribution. + + + 9. If my application uses ZLIB1.DLL, should I link it to + MSVCRT.DLL? Why? + + - It is not required, but it is recommended to link your + application to MSVCRT.DLL, if it uses ZLIB1.DLL. + + The executables (.EXE, .DLL, etc.) that are involved in the + same process and are using the C run-time library (i.e. they + are calling standard C functions), must link to the same + library. There are several libraries in the Win32 system: + CRTDLL.DLL, MSVCRT.DLL, the static C libraries, etc. + Since ZLIB1.DLL is linked to MSVCRT.DLL, the executables that + depend on it should also be linked to MSVCRT.DLL. + + +10. Why are you saying that ZLIB1.DLL and my application should + be linked to the same C run-time (CRT) library? I linked my + application and my DLLs to different C libraries (e.g. my + application to a static library, and my DLLs to MSVCRT.DLL), + and everything works fine. + + - If a user library invokes only pure Win32 API (accessible via + and the related headers), its DLL build will work + in any context. But if this library invokes standard C API, + things get more complicated. + + There is a single Win32 library in a Win32 system. Every + function in this library resides in a single DLL module, that + is safe to call from anywhere. On the other hand, there are + multiple versions of the C library, and each of them has its + own separate internal state. Standalone executables and user + DLLs that call standard C functions must link to a C run-time + (CRT) library, be it static or shared (DLL). Intermixing + occurs when an executable (not necessarily standalone) and a + DLL are linked to different CRTs, and both are running in the + same process. + + Intermixing multiple CRTs is possible, as long as their + internal states are kept intact. The Microsoft Knowledge Base + articles KB94248 "HOWTO: Use the C Run-Time" and KB140584 + "HOWTO: Link with the Correct C Run-Time (CRT) Library" + mention the potential problems raised by intermixing. + + If intermixing works for you, it's because your application + and DLLs are avoiding the corruption of each of the CRTs' + internal states, maybe by careful design, or maybe by fortune. + + Also note that linking ZLIB1.DLL to non-Microsoft CRTs, such + as those provided by Borland, raises similar problems. + + +11. Why are you linking ZLIB1.DLL to MSVCRT.DLL? + + - MSVCRT.DLL exists on every Windows 95 with a new service pack + installed, or with Microsoft Internet Explorer 4 or later, and + on all other Windows 4.x or later (Windows 98, Windows NT 4, + or later). It is freely distributable; if not present in the + system, it can be downloaded from Microsoft or from other + software provider for free. + + The fact that MSVCRT.DLL does not exist on a virgin Windows 95 + is not so problematic. Windows 95 is scarcely found nowadays, + Microsoft ended its support a long time ago, and many recent + applications from various vendors, including Microsoft, do not + even run on it. Furthermore, no serious user should run + Windows 95 without a proper update installed. + + +12. Why are you not linking ZLIB1.DLL to + <> ? + + - We considered and abandoned the following alternatives: + + * Linking ZLIB1.DLL to a static C library (LIBC.LIB, or + LIBCMT.LIB) is not a good option. People are using the DLL + mainly to save disk space. If you are linking your program + to a static C library, you may as well consider linking zlib + in statically, too. + + * Linking ZLIB1.DLL to CRTDLL.DLL looks appealing, because + CRTDLL.DLL is present on every Win32 installation. + Unfortunately, it has a series of problems: it does not + work properly with Microsoft's C++ libraries, it does not + provide support for 64-bit file offsets, (and so on...), + and Microsoft discontinued its support a long time ago. + + * Linking ZLIB1.DLL to MSVCR70.DLL or MSVCR71.DLL, supplied + with the Microsoft .NET platform, and Visual C++ 7.0/7.1, + raises problems related to the status of ZLIB1.DLL as a + system component. According to the Microsoft Knowledge Base + article KB326922 "INFO: Redistribution of the Shared C + Runtime Component in Visual C++ .NET", MSVCR70.DLL and + MSVCR71.DLL are not supposed to function as system DLLs, + because they may clash with MSVCRT.DLL. Instead, the + application's installer is supposed to put these DLLs + (if needed) in the application's private directory. + If ZLIB1.DLL depends on a non-system runtime, it cannot + function as a redistributable system component. + + * Linking ZLIB1.DLL to non-Microsoft runtimes, such as + Borland's, or Cygwin's, raises problems related to the + reliable presence of these runtimes on Win32 systems. + It's easier to let the DLL build of zlib up to the people + who distribute these runtimes, and who may proceed as + explained in the answer to Question 14. + + +13. If ZLIB1.DLL cannot be linked to MSVCR70.DLL or MSVCR71.DLL, + how can I build/use ZLIB1.DLL in Microsoft Visual C++ 7.0 + (Visual Studio .NET) or newer? + + - Due to the problems explained in the Microsoft Knowledge Base + article KB326922 (see the previous answer), the C runtime that + comes with the VC7 environment is no longer considered a + system component. That is, it should not be assumed that this + runtime exists, or may be installed in a system directory. + Since ZLIB1.DLL is supposed to be a system component, it may + not depend on a non-system component. + + In order to link ZLIB1.DLL and your application to MSVCRT.DLL + in VC7, you need the library of Visual C++ 6.0 or older. If + you don't have this library at hand, it's probably best not to + use ZLIB1.DLL. + + We are hoping that, in the future, Microsoft will provide a + way to build applications linked to a proper system runtime, + from the Visual C++ environment. Until then, you have a + couple of alternatives, such as linking zlib in statically. + If your application requires dynamic linking, you may proceed + as explained in the answer to Question 14. + + +14. I need to link my own DLL build to a CRT different than + MSVCRT.DLL. What can I do? + + - Feel free to rebuild the DLL from the zlib sources, and link + it the way you want. You should, however, clearly state that + your build is unofficial. You should give it a different file + name, and/or install it in a private directory that can be + accessed by your application only, and is not visible to the + others (e.g. it's not in the SYSTEM or the SYSTEM32 directory, + and it's not in the PATH). Otherwise, your build may clash + with applications that link to the official build. + + For example, in Cygwin, zlib is linked to the Cygwin runtime + CYGWIN1.DLL, and it is distributed under the name CYGZ.DLL. + + +15. May I include additional pieces of code that I find useful, + link them in ZLIB1.DLL, and export them? + + - No. A legitimate build of ZLIB1.DLL must not include code + that does not originate from the official zlib source code. + But you can make your own private DLL build, under a different + file name, as suggested in the previous answer. + + For example, zlib is a part of the VCL library, distributed + with Borland Delphi and C++ Builder. The DLL build of VCL + is a redistributable file, named VCLxx.DLL. + + +16. May I remove some functionality out of ZLIB1.DLL, by enabling + macros like NO_GZCOMPRESS or NO_GZIP at compile time? + + - No. A legitimate build of ZLIB1.DLL must provide the complete + zlib functionality, as implemented in the official zlib source + code. But you can make your own private DLL build, under a + different file name, as suggested in the previous answer. + + +17. I made my own ZLIB1.DLL build. Can I test it for compliance? + + - We prefer that you download the official DLL from the zlib + web site. If you need something peculiar from this DLL, you + can send your suggestion to the zlib mailing list. + + However, in case you do rebuild the DLL yourself, you can run + it with the test programs found in the DLL distribution. + Running these test programs is not a guarantee of compliance, + but a failure can imply a detected problem. + +** + +This document is written and maintained by +Cosmin Truta Added: external/zlib/win32/Makefile.bor ============================================================================== --- (empty file) +++ external/zlib/win32/Makefile.bor Tue Jan 3 07:42:59 2006 @@ -0,0 +1,107 @@ +# Makefile for zlib +# Borland C++ for Win32 +# +# Updated for zlib 1.2.x by Cosmin Truta, 11-Mar-2003 +# Last updated: 28-Aug-2003 +# +# Usage: +# make -f win32/Makefile.bor +# make -f win32/Makefile.bor LOCAL_ZLIB=-DASMV OBJA=match.obj OBJPA=+match.obj + +# ------------ Borland C++ ------------ + +# Optional nonstandard preprocessor flags (e.g. -DMAX_MEM_LEVEL=7) +# should be added to the environment via "set LOCAL_ZLIB=-DFOO" or +# added to the declaration of LOC here: +LOC = $(LOCAL_ZLIB) + +CC = bcc32 +AS = bcc32 +LD = bcc32 +AR = tlib +CFLAGS = -a -d -k- -O2 $(LOC) +ASFLAGS = $(LOC) +LDFLAGS = $(LOC) + + +# variables +ZLIB_LIB = zlib.lib + +OBJ1 = adler32.obj compress.obj crc32.obj deflate.obj gzio.obj infback.obj +OBJ2 = inffast.obj inflate.obj inftrees.obj trees.obj uncompr.obj zutil.obj +#OBJA = +OBJP1 = +adler32.obj+compress.obj+crc32.obj+deflate.obj+gzio.obj+infback.obj +OBJP2 = +inffast.obj+inflate.obj+inftrees.obj+trees.obj+uncompr.obj+zutil.obj +#OBJPA= + + +# targets +all: $(ZLIB_LIB) example.exe minigzip.exe + +.c.obj: + $(CC) -c $(CFLAGS) $< + +.asm.obj: + $(AS) -c $(ASFLAGS) $< + +adler32.obj: adler32.c zlib.h zconf.h + +compress.obj: compress.c zlib.h zconf.h + +crc32.obj: crc32.c zlib.h zconf.h crc32.h + +deflate.obj: deflate.c deflate.h zutil.h zlib.h zconf.h + +gzio.obj: gzio.c zutil.h zlib.h zconf.h + +infback.obj: infback.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h inffixed.h + +inffast.obj: inffast.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h + +inflate.obj: inflate.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h inffixed.h + +inftrees.obj: inftrees.c zutil.h zlib.h zconf.h inftrees.h + +trees.obj: trees.c zutil.h zlib.h zconf.h deflate.h trees.h + +uncompr.obj: uncompr.c zlib.h zconf.h + +zutil.obj: zutil.c zutil.h zlib.h zconf.h + +example.obj: example.c zlib.h zconf.h + +minigzip.obj: minigzip.c zlib.h zconf.h + + +# For the sake of the old Borland make, +# the command line is cut to fit in the MS-DOS 128 byte limit: +$(ZLIB_LIB): $(OBJ1) $(OBJ2) $(OBJA) + -del $(ZLIB_LIB) + $(AR) $(ZLIB_LIB) $(OBJP1) + $(AR) $(ZLIB_LIB) $(OBJP2) + $(AR) $(ZLIB_LIB) $(OBJPA) + + +# testing +test: example.exe minigzip.exe + example + echo hello world | minigzip | minigzip -d + +example.exe: example.obj $(ZLIB_LIB) + $(LD) $(LDFLAGS) example.obj $(ZLIB_LIB) + +minigzip.exe: minigzip.obj $(ZLIB_LIB) + $(LD) $(LDFLAGS) minigzip.obj $(ZLIB_LIB) + + +# cleanup +clean: + -del *.obj + -del *.lib + -del *.exe + -del *.tds + -del zlib.bak + -del foo.gz Added: external/zlib/win32/Makefile.emx ============================================================================== --- (empty file) +++ external/zlib/win32/Makefile.emx Tue Jan 3 07:42:59 2006 @@ -0,0 +1,69 @@ +# Makefile for zlib. Modified for emx/rsxnt by Chr. Spieler, 6/16/98. +# Copyright (C) 1995-1998 Jean-loup Gailly. +# For conditions of distribution and use, see copyright notice in zlib.h + +# To compile, or to compile and test, type: +# +# make -fmakefile.emx; make test -fmakefile.emx +# + +CC=gcc -Zwin32 + +#CFLAGS=-MMD -O +#CFLAGS=-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7 +#CFLAGS=-MMD -g -DDEBUG +CFLAGS=-MMD -O3 $(BUTT) -Wall -Wwrite-strings -Wpointer-arith -Wconversion \ + -Wstrict-prototypes -Wmissing-prototypes + +# If cp.exe is available, replace "copy /Y" with "cp -fp" . +CP=copy /Y +# If gnu install.exe is available, replace $(CP) with ginstall. +INSTALL=$(CP) +# The default value of RM is "rm -f." If "rm.exe" is found, comment out: +RM=del +LDLIBS=-L. -lzlib +LD=$(CC) -s -o +LDSHARED=$(CC) + +INCL=zlib.h zconf.h +LIBS=zlib.a + +AR=ar rcs + +prefix=/usr/local +exec_prefix = $(prefix) + +OBJS = adler32.o compress.o crc32.o gzio.o uncompr.o deflate.o trees.o \ + zutil.o inflate.o infback.o inftrees.o inffast.o + +TEST_OBJS = example.o minigzip.o + +all: example.exe minigzip.exe + +test: all + ./example + echo hello world | .\minigzip | .\minigzip -d + +%.o : %.c + $(CC) $(CFLAGS) -c $< -o $@ + +zlib.a: $(OBJS) + $(AR) $@ $(OBJS) + +%.exe : %.o $(LIBS) + $(LD) $@ $< $(LDLIBS) + + +.PHONY : clean + +clean: + $(RM) *.d + $(RM) *.o + $(RM) *.exe + $(RM) zlib.a + $(RM) foo.gz + +DEPS := $(wildcard *.d) +ifneq ($(DEPS),) +include $(DEPS) +endif Added: external/zlib/win32/Makefile.gcc ============================================================================== --- (empty file) +++ external/zlib/win32/Makefile.gcc Tue Jan 3 07:42:59 2006 @@ -0,0 +1,141 @@ +# Makefile for zlib, derived from Makefile.dj2. +# Modified for mingw32 by C. Spieler, 6/16/98. +# Updated for zlib 1.2.x by Christian Spieler and Cosmin Truta, Mar-2003. +# Last updated: 1-Aug-2003. +# Tested under Cygwin and MinGW. + +# Copyright (C) 1995-2003 Jean-loup Gailly. +# For conditions of distribution and use, see copyright notice in zlib.h + +# To compile, or to compile and test, type: +# +# make -fmakefile.gcc; make test testdll -fmakefile.gcc +# +# To use the asm code, type: +# cp contrib/asm?86/match.S ./match.S +# make LOC=-DASMV OBJA=match.o -fmakefile.gcc +# +# To install libz.a, zconf.h and zlib.h in the system directories, type: +# +# make install -fmakefile.gcc + +# Note: +# If the platform is *not* MinGW (e.g. it is Cygwin or UWIN), +# the DLL name should be changed from "zlib1.dll". + +STATICLIB = libz.a +SHAREDLIB = zlib1.dll +IMPLIB = libzdll.a + +#LOC = -DASMV +#LOC = -DDEBUG -g + +CC = gcc +CFLAGS = $(LOC) -O3 -Wall + +AS = $(CC) +ASFLAGS = $(LOC) -Wall + +LD = $(CC) +LDFLAGS = $(LOC) -s + +AR = ar +ARFLAGS = rcs + +RC = windres +RCFLAGS = --define GCC_WINDRES + +CP = cp -fp +# If GNU install is available, replace $(CP) with install. +INSTALL = $(CP) +RM = rm -f + +prefix = /usr/local +exec_prefix = $(prefix) + +OBJS = adler32.o compress.o crc32.o deflate.o gzio.o infback.o \ + inffast.o inflate.o inftrees.o trees.o uncompr.o zutil.o +OBJA = + +all: $(STATICLIB) $(SHAREDLIB) $(IMPLIB) example minigzip example_d minigzip_d + +test: example minigzip + ./example + echo hello world | ./minigzip | ./minigzip -d + +testdll: example_d minigzip_d + ./example_d + echo hello world | ./minigzip_d | ./minigzip_d -d + +.c.o: + $(CC) $(CFLAGS) -c -o $@ $< + +.S.o: + $(AS) $(ASFLAGS) -c -o $@ $< + +$(STATICLIB): $(OBJS) $(OBJA) + $(AR) $(ARFLAGS) $@ $(OBJS) $(OBJA) + +$(IMPLIB): $(SHAREDLIB) + +$(SHAREDLIB): win32/zlib.def $(OBJS) $(OBJA) zlibrc.o + dllwrap --driver-name $(CC) --def win32/zlib.def \ + --implib $(IMPLIB) -o $@ $(OBJS) $(OBJA) zlibrc.o + strip $@ + +example: example.o $(STATICLIB) + $(LD) $(LDFLAGS) -o $@ example.o $(STATICLIB) + +minigzip: minigzip.o $(STATICLIB) + $(LD) $(LDFLAGS) -o $@ minigzip.o $(STATICLIB) + +example_d: example.o $(IMPLIB) + $(LD) $(LDFLAGS) -o $@ example.o $(IMPLIB) + +minigzip_d: minigzip.o $(IMPLIB) + $(LD) $(LDFLAGS) -o $@ minigzip.o $(IMPLIB) + +zlibrc.o: win32/zlib1.rc + $(RC) $(RCFLAGS) -o $@ win32/zlib1.rc + + +# INCLUDE_PATH and LIBRARY_PATH must be set. + +.PHONY: install uninstall clean + +install: zlib.h zconf.h $(LIB) + - at if not exist $(INCLUDE_PATH)/nul mkdir $(INCLUDE_PATH) + - at if not exist $(LIBRARY_PATH)/nul mkdir $(LIBRARY_PATH) + -$(INSTALL) zlib.h $(INCLUDE_PATH) + -$(INSTALL) zconf.h $(INCLUDE_PATH) + -$(INSTALL) $(STATICLIB) $(LIBRARY_PATH) + -$(INSTALL) $(IMPLIB) $(LIBRARY_PATH) + +uninstall: + -$(RM) $(INCLUDE_PATH)/zlib.h + -$(RM) $(INCLUDE_PATH)/zconf.h + -$(RM) $(LIBRARY_PATH)/$(STATICLIB) + -$(RM) $(LIBRARY_PATH)/$(IMPLIB) + +clean: + -$(RM) $(STATICLIB) + -$(RM) $(SHAREDLIB) + -$(RM) $(IMPLIB) + -$(RM) *.o + -$(RM) *.exe + -$(RM) foo.gz + +adler32.o: zlib.h zconf.h +compress.o: zlib.h zconf.h +crc32.o: crc32.h zlib.h zconf.h +deflate.o: deflate.h zutil.h zlib.h zconf.h +example.o: zlib.h zconf.h +gzio.o: zutil.h zlib.h zconf.h +inffast.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h +inflate.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h +infback.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h +inftrees.o: zutil.h zlib.h zconf.h inftrees.h +minigzip.o: zlib.h zconf.h +trees.o: deflate.h zutil.h zlib.h zconf.h trees.h +uncompr.o: zlib.h zconf.h +zutil.o: zutil.h zlib.h zconf.h Added: external/zlib/win32/Makefile.msc ============================================================================== --- (empty file) +++ external/zlib/win32/Makefile.msc Tue Jan 3 07:42:59 2006 @@ -0,0 +1,126 @@ +# Makefile for zlib -- Microsoft (Visual) C +# +# Authors: +# Cosmin Truta, 11-Mar-2003 +# Christian Spieler, 19-Mar-2003 +# +# Last updated: +# Cosmin Truta, 27-Aug-2003 +# +# Usage: +# nmake -f win32/Makefile.msc (standard build) +# nmake -f win32/Makefile.msc LOC=-DFOO (nonstandard build) +# nmake -f win32/Makefile.msc LOC=-DASMV OBJA=match.obj (use ASM code) + + +# optional build flags +LOC = + + +# variables +STATICLIB = zlib.lib +SHAREDLIB = zlib1.dll +IMPLIB = zdll.lib + +CC = cl +AS = ml +LD = link +AR = lib +RC = rc +CFLAGS = -nologo -MD -O2 $(LOC) +ASFLAGS = -coff +LDFLAGS = -nologo -release +ARFLAGS = -nologo +RCFLAGS = /dWIN32 /r + +OBJS = adler32.obj compress.obj crc32.obj deflate.obj gzio.obj infback.obj \ + inffast.obj inflate.obj inftrees.obj trees.obj uncompr.obj zutil.obj +OBJA = + + +# targets +all: $(STATICLIB) $(SHAREDLIB) $(IMPLIB) \ + example.exe minigzip.exe example_d.exe minigzip_d.exe + +$(STATICLIB): $(OBJS) $(OBJA) + $(AR) $(ARFLAGS) -out:$@ $(OBJS) $(OBJA) + +$(IMPLIB): $(SHAREDLIB) + +$(SHAREDLIB): win32/zlib.def $(OBJS) $(OBJA) zlib1.res + $(LD) $(LDFLAGS) -def:win32/zlib.def -dll -implib:$(IMPLIB) \ + -out:$@ $(OBJS) $(OBJA) zlib1.res + +example.exe: example.obj $(STATICLIB) + $(LD) $(LDFLAGS) example.obj $(STATICLIB) + +minigzip.exe: minigzip.obj $(STATICLIB) + $(LD) $(LDFLAGS) minigzip.obj $(STATICLIB) + +example_d.exe: example.obj $(IMPLIB) + $(LD) $(LDFLAGS) -out:$@ example.obj $(IMPLIB) + +minigzip_d.exe: minigzip.obj $(IMPLIB) + $(LD) $(LDFLAGS) -out:$@ minigzip.obj $(IMPLIB) + +.c.obj: + $(CC) -c $(CFLAGS) $< + +.asm.obj: + $(AS) -c $(ASFLAGS) $< + +adler32.obj: adler32.c zlib.h zconf.h + +compress.obj: compress.c zlib.h zconf.h + +crc32.obj: crc32.c zlib.h zconf.h crc32.h + +deflate.obj: deflate.c deflate.h zutil.h zlib.h zconf.h + +gzio.obj: gzio.c zutil.h zlib.h zconf.h + +infback.obj: infback.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h inffixed.h + +inffast.obj: inffast.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h + +inflate.obj: inflate.c zutil.h zlib.h zconf.h inftrees.h inflate.h \ + inffast.h inffixed.h + +inftrees.obj: inftrees.c zutil.h zlib.h zconf.h inftrees.h + +trees.obj: trees.c zutil.h zlib.h zconf.h deflate.h trees.h + +uncompr.obj: uncompr.c zlib.h zconf.h + +zutil.obj: zutil.c zutil.h zlib.h zconf.h + +example.obj: example.c zlib.h zconf.h + +minigzip.obj: minigzip.c zlib.h zconf.h + +zlib1.res: win32/zlib1.rc + $(RC) $(RCFLAGS) /fo$@ win32/zlib1.rc + + +# testing +test: example.exe minigzip.exe + example + echo hello world | minigzip | minigzip -d + +testdll: example_d.exe minigzip_d.exe + example_d + echo hello world | minigzip_d | minigzip_d -d + + +# cleanup +clean: + -del $(STATICLIB) + -del $(SHAREDLIB) + -del $(IMPLIB) + -del *.obj + -del *.res + -del *.exp + -del *.exe + -del foo.gz Added: external/zlib/win32/VisualC.txt ============================================================================== --- (empty file) +++ external/zlib/win32/VisualC.txt Tue Jan 3 07:42:59 2006 @@ -0,0 +1,3 @@ + +To build zlib using the Microsoft Visual C++ environment, +use the appropriate project from the projects/ directory. Added: external/zlib/win32/zlib.def ============================================================================== --- (empty file) +++ external/zlib/win32/zlib.def Tue Jan 3 07:42:59 2006 @@ -0,0 +1,60 @@ +LIBRARY +; zlib data compression library + +EXPORTS +; basic functions + zlibVersion + deflate + deflateEnd + inflate + inflateEnd +; advanced functions + deflateSetDictionary + deflateCopy + deflateReset + deflateParams + deflateBound + deflatePrime + inflateSetDictionary + inflateSync + inflateCopy + inflateReset + inflateBack + inflateBackEnd + zlibCompileFlags +; utility functions + compress + compress2 + compressBound + uncompress + gzopen + gzdopen + gzsetparams + gzread + gzwrite + gzprintf + gzputs + gzgets + gzputc + gzgetc + gzungetc + gzflush + gzseek + gzrewind + gztell + gzeof + gzclose + gzerror + gzclearerr +; checksum functions + adler32 + crc32 +; various hacks, don't look :) + deflateInit_ + deflateInit2_ + inflateInit_ + inflateInit2_ + inflateBackInit_ + inflateSyncPoint + get_crc_table + zError Added: external/zlib/win32/zlib1.rc ============================================================================== --- (empty file) +++ external/zlib/win32/zlib1.rc Tue Jan 3 07:42:59 2006 @@ -0,0 +1,39 @@ +#include + +#ifdef GCC_WINDRES +VS_VERSION_INFO VERSIONINFO +#else +VS_VERSION_INFO VERSIONINFO MOVEABLE IMPURE LOADONCALL DISCARDABLE +#endif + FILEVERSION 1,2,2,0 + PRODUCTVERSION 1,2,2,0 + FILEFLAGSMASK VS_FFI_FILEFLAGSMASK +#ifdef _DEBUG + FILEFLAGS 1 +#else + FILEFLAGS 0 +#endif + FILEOS VOS_DOS_WINDOWS32 + FILETYPE VFT_DLL + FILESUBTYPE 0 // not used +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904E4" + //language ID = U.S. English, char set = Windows, Multilingual + BEGIN + VALUE "FileDescription", "zlib data compression library\0" + VALUE "FileVersion", "1.2.3\0" + VALUE "InternalName", "zlib1.dll\0" + VALUE "LegalCopyright", "(C) 1995-2004 Jean-loup Gailly & Mark Adler\0" + VALUE "OriginalFilename", "zlib1.dll\0" + VALUE "ProductName", "zlib\0" + VALUE "ProductVersion", "1.2.3\0" + VALUE "Comments","DLL support by Alessandro Iacopetti & Gilles Vollant\0" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x0409, 1252 + END +END Added: external/zlib/zconf.h ============================================================================== --- (empty file) +++ external/zlib/zconf.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,332 @@ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#ifndef ZCONF_H +#define ZCONF_H + +/* + * If you *really* need a unique prefix for all types and library functions, + * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + */ +#ifdef Z_PREFIX +# define deflateInit_ z_deflateInit_ +# define deflate z_deflate +# define deflateEnd z_deflateEnd +# define inflateInit_ z_inflateInit_ +# define inflate z_inflate +# define inflateEnd z_inflateEnd +# define deflateInit2_ z_deflateInit2_ +# define deflateSetDictionary z_deflateSetDictionary +# define deflateCopy z_deflateCopy +# define deflateReset z_deflateReset +# define deflateParams z_deflateParams +# define deflateBound z_deflateBound +# define deflatePrime z_deflatePrime +# define inflateInit2_ z_inflateInit2_ +# define inflateSetDictionary z_inflateSetDictionary +# define inflateSync z_inflateSync +# define inflateSyncPoint z_inflateSyncPoint +# define inflateCopy z_inflateCopy +# define inflateReset z_inflateReset +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define compress z_compress +# define compress2 z_compress2 +# define compressBound z_compressBound +# define uncompress z_uncompress +# define adler32 z_adler32 +# define crc32 z_crc32 +# define get_crc_table z_get_crc_table +# define zError z_zError + +# define alloc_func z_alloc_func +# define free_func z_free_func +# define in_func z_in_func +# define out_func z_out_func +# define Byte z_Byte +# define uInt z_uInt +# define uLong z_uLong +# define Bytef z_Bytef +# define charf z_charf +# define intf z_intf +# define uIntf z_uIntf +# define uLongf z_uLongf +# define voidpf z_voidpf +# define voidp z_voidp +#endif + +#if defined(__MSDOS__) && !defined(MSDOS) +# define MSDOS +#endif +#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) +# define OS2 +#endif +#if defined(_WINDOWS) && !defined(WINDOWS) +# define WINDOWS +#endif +#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) +# ifndef WIN32 +# define WIN32 +# endif +#endif +#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) +# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) +# ifndef SYS16BIT +# define SYS16BIT +# endif +# endif +#endif + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + */ +#ifdef SYS16BIT +# define MAXSEG_64K +#endif +#ifdef MSDOS +# define UNALIGNED_OK +#endif + +#ifdef __STDC_VERSION__ +# ifndef STDC +# define STDC +# endif +# if __STDC_VERSION__ >= 199901L +# ifndef STDC99 +# define STDC99 +# endif +# endif +#endif +#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) +# define STDC +#endif +#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) +# define STDC +#endif +#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) +# define STDC +#endif +#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) +# define STDC +#endif + +#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ +# define STDC +#endif + +#ifndef STDC +# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ +# define const /* note: need a more gentle solution here */ +# endif +#endif + +/* Some Mac compilers merge all .h files incorrectly: */ +#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__) +# define NO_DUMMY_DECL +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus a few kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +/* The following definitions for FAR are needed only for MSDOS mixed + * model programming (small or medium model with some far allocations). + * This was tested only with MSC; for other MSDOS compilers you may have + * to define NO_MEMCPY in zutil.h. If you don't need the mixed model, + * just define FAR to be empty. + */ +#ifdef SYS16BIT +# if defined(M_I86SM) || defined(M_I86MM) + /* MSC small or medium model */ +# define SMALL_MEDIUM +# ifdef _MSC_VER +# define FAR _far +# else +# define FAR far +# endif +# endif +# if (defined(__SMALL__) || defined(__MEDIUM__)) + /* Turbo C small or medium model */ +# define SMALL_MEDIUM +# ifdef __BORLANDC__ +# define FAR _far +# else +# define FAR far +# endif +# endif +#endif + +#if defined(WINDOWS) || defined(WIN32) + /* If building or using zlib as a DLL, define ZLIB_DLL. + * This is not mandatory, but it offers a little performance increase. + */ +# ifdef ZLIB_DLL +# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) +# ifdef ZLIB_INTERNAL +# define ZEXTERN extern __declspec(dllexport) +# else +# define ZEXTERN extern __declspec(dllimport) +# endif +# endif +# endif /* ZLIB_DLL */ + /* If building or using zlib with the WINAPI/WINAPIV calling convention, + * define ZLIB_WINAPI. + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ +# ifdef ZLIB_WINAPI +# ifdef FAR +# undef FAR +# endif +# include + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +# define ZEXPORT WINAPI +# ifdef WIN32 +# define ZEXPORTVA WINAPIV +# else +# define ZEXPORTVA FAR CDECL +# endif +# endif +#endif + +#if defined (__BEOS__) +# ifdef ZLIB_DLL +# ifdef ZLIB_INTERNAL +# define ZEXPORT __declspec(dllexport) +# define ZEXPORTVA __declspec(dllexport) +# else +# define ZEXPORT __declspec(dllimport) +# define ZEXPORTVA __declspec(dllimport) +# endif +# endif +#endif + +#ifndef ZEXTERN +# define ZEXTERN extern +#endif +#ifndef ZEXPORT +# define ZEXPORT +#endif +#ifndef ZEXPORTVA +# define ZEXPORTVA +#endif + +#ifndef FAR +# define FAR +#endif + +#if !defined(__MACTYPES__) +typedef unsigned char Byte; /* 8 bits */ +#endif +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +#ifdef SMALL_MEDIUM + /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ +# define Bytef Byte FAR +#else + typedef Byte FAR Bytef; +#endif +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void const *voidpc; + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte const *voidpc; + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +#if 0 /* HAVE_UNISTD_H -- this line is updated by ./configure */ +# include /* for off_t */ +# include /* for SEEK_* and off_t */ +# ifdef VMS +# include /* for off_t */ +# endif +# define z_off_t off_t +#endif +#ifndef SEEK_SET +# define SEEK_SET 0 /* Seek from beginning of file. */ +# define SEEK_CUR 1 /* Seek from current position. */ +# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif +#ifndef z_off_t +# define z_off_t long +#endif + +#if defined(__OS400__) +# define NO_vsnprintf +#endif + +#if defined(__MVS__) +# define NO_vsnprintf +# ifdef FAR +# undef FAR +# endif +#endif + +/* MVS linker does not support external names larger than 8 bytes */ +#if defined(__MVS__) +# pragma map(deflateInit_,"DEIN") +# pragma map(deflateInit2_,"DEIN2") +# pragma map(deflateEnd,"DEEND") +# pragma map(deflateBound,"DEBND") +# pragma map(inflateInit_,"ININ") +# pragma map(inflateInit2_,"ININ2") +# pragma map(inflateEnd,"INEND") +# pragma map(inflateSync,"INSY") +# pragma map(inflateSetDictionary,"INSEDI") +# pragma map(compressBound,"CMBND") +# pragma map(inflate_table,"INTABL") +# pragma map(inflate_fast,"INFA") +# pragma map(inflate_copyright,"INCOPY") +#endif + +#endif /* ZCONF_H */ Added: external/zlib/zconf.in.h ============================================================================== --- (empty file) +++ external/zlib/zconf.in.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,332 @@ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#ifndef ZCONF_H +#define ZCONF_H + +/* + * If you *really* need a unique prefix for all types and library functions, + * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + */ +#ifdef Z_PREFIX +# define deflateInit_ z_deflateInit_ +# define deflate z_deflate +# define deflateEnd z_deflateEnd +# define inflateInit_ z_inflateInit_ +# define inflate z_inflate +# define inflateEnd z_inflateEnd +# define deflateInit2_ z_deflateInit2_ +# define deflateSetDictionary z_deflateSetDictionary +# define deflateCopy z_deflateCopy +# define deflateReset z_deflateReset +# define deflateParams z_deflateParams +# define deflateBound z_deflateBound +# define deflatePrime z_deflatePrime +# define inflateInit2_ z_inflateInit2_ +# define inflateSetDictionary z_inflateSetDictionary +# define inflateSync z_inflateSync +# define inflateSyncPoint z_inflateSyncPoint +# define inflateCopy z_inflateCopy +# define inflateReset z_inflateReset +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define compress z_compress +# define compress2 z_compress2 +# define compressBound z_compressBound +# define uncompress z_uncompress +# define adler32 z_adler32 +# define crc32 z_crc32 +# define get_crc_table z_get_crc_table +# define zError z_zError + +# define alloc_func z_alloc_func +# define free_func z_free_func +# define in_func z_in_func +# define out_func z_out_func +# define Byte z_Byte +# define uInt z_uInt +# define uLong z_uLong +# define Bytef z_Bytef +# define charf z_charf +# define intf z_intf +# define uIntf z_uIntf +# define uLongf z_uLongf +# define voidpf z_voidpf +# define voidp z_voidp +#endif + +#if defined(__MSDOS__) && !defined(MSDOS) +# define MSDOS +#endif +#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) +# define OS2 +#endif +#if defined(_WINDOWS) && !defined(WINDOWS) +# define WINDOWS +#endif +#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) +# ifndef WIN32 +# define WIN32 +# endif +#endif +#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) +# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) +# ifndef SYS16BIT +# define SYS16BIT +# endif +# endif +#endif + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + */ +#ifdef SYS16BIT +# define MAXSEG_64K +#endif +#ifdef MSDOS +# define UNALIGNED_OK +#endif + +#ifdef __STDC_VERSION__ +# ifndef STDC +# define STDC +# endif +# if __STDC_VERSION__ >= 199901L +# ifndef STDC99 +# define STDC99 +# endif +# endif +#endif +#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) +# define STDC +#endif +#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) +# define STDC +#endif +#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) +# define STDC +#endif +#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) +# define STDC +#endif + +#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ +# define STDC +#endif + +#ifndef STDC +# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ +# define const /* note: need a more gentle solution here */ +# endif +#endif + +/* Some Mac compilers merge all .h files incorrectly: */ +#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__) +# define NO_DUMMY_DECL +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus a few kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +/* The following definitions for FAR are needed only for MSDOS mixed + * model programming (small or medium model with some far allocations). + * This was tested only with MSC; for other MSDOS compilers you may have + * to define NO_MEMCPY in zutil.h. If you don't need the mixed model, + * just define FAR to be empty. + */ +#ifdef SYS16BIT +# if defined(M_I86SM) || defined(M_I86MM) + /* MSC small or medium model */ +# define SMALL_MEDIUM +# ifdef _MSC_VER +# define FAR _far +# else +# define FAR far +# endif +# endif +# if (defined(__SMALL__) || defined(__MEDIUM__)) + /* Turbo C small or medium model */ +# define SMALL_MEDIUM +# ifdef __BORLANDC__ +# define FAR _far +# else +# define FAR far +# endif +# endif +#endif + +#if defined(WINDOWS) || defined(WIN32) + /* If building or using zlib as a DLL, define ZLIB_DLL. + * This is not mandatory, but it offers a little performance increase. + */ +# ifdef ZLIB_DLL +# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) +# ifdef ZLIB_INTERNAL +# define ZEXTERN extern __declspec(dllexport) +# else +# define ZEXTERN extern __declspec(dllimport) +# endif +# endif +# endif /* ZLIB_DLL */ + /* If building or using zlib with the WINAPI/WINAPIV calling convention, + * define ZLIB_WINAPI. + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ +# ifdef ZLIB_WINAPI +# ifdef FAR +# undef FAR +# endif +# include + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +# define ZEXPORT WINAPI +# ifdef WIN32 +# define ZEXPORTVA WINAPIV +# else +# define ZEXPORTVA FAR CDECL +# endif +# endif +#endif + +#if defined (__BEOS__) +# ifdef ZLIB_DLL +# ifdef ZLIB_INTERNAL +# define ZEXPORT __declspec(dllexport) +# define ZEXPORTVA __declspec(dllexport) +# else +# define ZEXPORT __declspec(dllimport) +# define ZEXPORTVA __declspec(dllimport) +# endif +# endif +#endif + +#ifndef ZEXTERN +# define ZEXTERN extern +#endif +#ifndef ZEXPORT +# define ZEXPORT +#endif +#ifndef ZEXPORTVA +# define ZEXPORTVA +#endif + +#ifndef FAR +# define FAR +#endif + +#if !defined(__MACTYPES__) +typedef unsigned char Byte; /* 8 bits */ +#endif +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +#ifdef SMALL_MEDIUM + /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ +# define Bytef Byte FAR +#else + typedef Byte FAR Bytef; +#endif +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void const *voidpc; + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte const *voidpc; + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +#if 0 /* HAVE_UNISTD_H -- this line is updated by ./configure */ +# include /* for off_t */ +# include /* for SEEK_* and off_t */ +# ifdef VMS +# include /* for off_t */ +# endif +# define z_off_t off_t +#endif +#ifndef SEEK_SET +# define SEEK_SET 0 /* Seek from beginning of file. */ +# define SEEK_CUR 1 /* Seek from current position. */ +# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif +#ifndef z_off_t +# define z_off_t long +#endif + +#if defined(__OS400__) +# define NO_vsnprintf +#endif + +#if defined(__MVS__) +# define NO_vsnprintf +# ifdef FAR +# undef FAR +# endif +#endif + +/* MVS linker does not support external names larger than 8 bytes */ +#if defined(__MVS__) +# pragma map(deflateInit_,"DEIN") +# pragma map(deflateInit2_,"DEIN2") +# pragma map(deflateEnd,"DEEND") +# pragma map(deflateBound,"DEBND") +# pragma map(inflateInit_,"ININ") +# pragma map(inflateInit2_,"ININ2") +# pragma map(inflateEnd,"INEND") +# pragma map(inflateSync,"INSY") +# pragma map(inflateSetDictionary,"INSEDI") +# pragma map(compressBound,"CMBND") +# pragma map(inflate_table,"INTABL") +# pragma map(inflate_fast,"INFA") +# pragma map(inflate_copyright,"INCOPY") +#endif + +#endif /* ZCONF_H */ Added: external/zlib/zlib.3 ============================================================================== --- (empty file) +++ external/zlib/zlib.3 Tue Jan 3 07:42:59 2006 @@ -0,0 +1,159 @@ +.TH ZLIB 3 "18 July 2005" +.SH NAME +zlib \- compression/decompression library +.SH SYNOPSIS +[see +.I zlib.h +for full description] +.SH DESCRIPTION +The +.I zlib +library is a general purpose data compression library. +The code is thread safe. +It provides in-memory compression and decompression functions, +including integrity checks of the uncompressed data. +This version of the library supports only one compression method (deflation) +but other algorithms will be added later +and will have the same stream interface. +.LP +Compression can be done in a single step if the buffers are large enough +(for example if an input file is mmap'ed), +or can be done by repeated calls of the compression function. +In the latter case, +the application must provide more input and/or consume the output +(providing more output space) before each call. +.LP +The library also supports reading and writing files in +.IR gzip (1) +(.gz) format +with an interface similar to that of stdio. +.LP +The library does not install any signal handler. +The decoder checks the consistency of the compressed data, +so the library should never crash even in case of corrupted input. +.LP +All functions of the compression library are documented in the file +.IR zlib.h . +The distribution source includes examples of use of the library +in the files +.I example.c +and +.IR minigzip.c . +.LP +Changes to this version are documented in the file +.I ChangeLog +that accompanies the source, +and are concerned primarily with bug fixes and portability enhancements. +.LP +A Java implementation of +.I zlib +is available in the Java Development Kit 1.1: +.IP +http://www.javasoft.com/products/JDK/1.1/docs/api/Package-java.util.zip.html +.LP +A Perl interface to +.IR zlib , +written by Paul Marquess (pmqs at cpan.org), +is available at CPAN (Comprehensive Perl Archive Network) sites, +including: +.IP +http://www.cpan.org/modules/by-module/Compress/ +.LP +A Python interface to +.IR zlib , +written by A.M. Kuchling (amk at magnet.com), +is available in Python 1.5 and later versions: +.IP +http://www.python.org/doc/lib/module-zlib.html +.LP +A +.I zlib +binding for +.IR tcl (1), +written by Andreas Kupries (a.kupries at westend.com), +is availlable at: +.IP +http://www.westend.com/~kupries/doc/trf/man/man.html +.LP +An experimental package to read and write files in .zip format, +written on top of +.I zlib +by Gilles Vollant (info at winimage.com), +is available at: +.IP +http://www.winimage.com/zLibDll/unzip.html +and also in the +.I contrib/minizip +directory of the main +.I zlib +web site. +.SH "SEE ALSO" +The +.I zlib +web site can be found at either of these locations: +.IP +http://www.zlib.org +.br +http://www.gzip.org/zlib/ +.LP +The data format used by the zlib library is described by RFC +(Request for Comments) 1950 to 1952 in the files: +.IP +http://www.ietf.org/rfc/rfc1950.txt (concerning zlib format) +.br +http://www.ietf.org/rfc/rfc1951.txt (concerning deflate format) +.br +http://www.ietf.org/rfc/rfc1952.txt (concerning gzip format) +.LP +These documents are also available in other formats from: +.IP +ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html +.LP +Mark Nelson (markn at ieee.org) wrote an article about +.I zlib +for the Jan. 1997 issue of Dr. Dobb's Journal; +a copy of the article is available at: +.IP +http://dogma.net/markn/articles/zlibtool/zlibtool.htm +.SH "REPORTING PROBLEMS" +Before reporting a problem, +please check the +.I zlib +web site to verify that you have the latest version of +.IR zlib ; +otherwise, +obtain the latest version and see if the problem still exists. +Please read the +.I zlib +FAQ at: +.IP +http://www.gzip.org/zlib/zlib_faq.html +.LP +before asking for help. +Send questions and/or comments to zlib at gzip.org, +or (for the Windows DLL version) to Gilles Vollant (info at winimage.com). +.SH AUTHORS +Version 1.2.3 +Copyright (C) 1995-2005 Jean-loup Gailly (jloup at gzip.org) +and Mark Adler (madler at alumni.caltech.edu). +.LP +This software is provided "as-is," +without any express or implied warranty. +In no event will the authors be held liable for any damages +arising from the use of this software. +See the distribution directory with respect to requirements +governing redistribution. +The deflate format used by +.I zlib +was defined by Phil Katz. +The deflate and +.I zlib +specifications were written by L. Peter Deutsch. +Thanks to all the people who reported problems and suggested various +improvements in +.IR zlib ; +who are too numerous to cite here. +.LP +UNIX manual page by R. P. C. Rodgers, +U.S. National Library of Medicine (rodgers at nlm.nih.gov). +.\" end of man page Added: external/zlib/zlib.h ============================================================================== --- (empty file) +++ external/zlib/zlib.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,1357 @@ +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.3, July 18th, 2005 + + Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup at gzip.org madler at alumni.caltech.edu + + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt + (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format). +*/ + +#ifndef ZLIB_H +#define ZLIB_H + +#include "zconf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZLIB_VERSION "1.2.3" +#define ZLIB_VERNUM 0x1230 + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed + data. This version of the library supports only one compression method + (deflation) but other algorithms will be added later and will have the same + stream interface. + + Compression can be done in a single step if the buffers are large + enough (for example if an input file is mmap'ed), or can be done by + repeated calls of the compression function. In the latter case, the + application must provide more input and/or consume the output + (providing more output space) before each call. + + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio using the functions that start + with "gz". The gzip format is different from the zlib format. gzip is a + gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. + + This library can optionally read and write gzip streams in memory as well. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never + crash even in case of corrupted input. +*/ + +typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); +typedef void (*free_func) OF((voidpf opaque, voidpf address)); + +struct internal_state; + +typedef struct z_stream_s { + Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total nb of input bytes read so far */ + + Bytef *next_out; /* next output byte should be put there */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total nb of bytes output so far */ + + char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidpf opaque; /* private data object passed to zalloc and zfree */ + + int data_type; /* best guess about the data type: binary or text */ + uLong adler; /* adler32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream FAR *z_streamp; + +/* + gzip header information passed to and from zlib routines. See RFC 1952 + for more details on the meanings of these fields. +*/ +typedef struct gz_header_s { + int text; /* true if compressed data believed to be text */ + uLong time; /* modification time */ + int xflags; /* extra flags (not used when writing a gzip file) */ + int os; /* operating system */ + Bytef *extra; /* pointer to extra field or Z_NULL if none */ + uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ + uInt extra_max; /* space at extra (only when reading header) */ + Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ + uInt name_max; /* space at name (only when reading header) */ + Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ + uInt comm_max; /* space at comment (only when reading header) */ + int hcrc; /* true if there was or will be a header crc */ + int done; /* true when done reading gzip header (not used + when writing a gzip file) */ +} gz_header; + +typedef gz_header FAR *gz_headerp; + +/* + The application must update next_in and avail_in when avail_in has + dropped to zero. It must update next_out and avail_out when avail_out + has dropped to zero. The application must initialize zalloc, zfree and + opaque before calling the init function. All other fields are set by the + compression library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe. + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this + if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, + pointers returned by zalloc for objects of exactly 65536 bytes *must* + have their offset normalized to zero. The default allocation function + provided by this library ensures this (see zutil.c). To reduce memory + requirements and avoid any allocation of 64K objects, at the expense of + compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or + progress reports. After compression, total_in holds the total size of + the uncompressed data and may be saved for use in the decompressor + (particularly if the decompressor wants to decompress everything in + a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */ +#define Z_SYNC_FLUSH 2 +#define Z_FULL_FLUSH 3 +#define Z_FINISH 4 +#define Z_BLOCK 5 +/* Allowed flush values; see deflate() and inflate() below for details */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) +/* Return codes for the compression/decompression functions. Negative + * values are errors, positive values are used for special but normal events. + */ + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_RLE 3 +#define Z_FIXED 4 +#define Z_DEFAULT_STRATEGY 0 +/* compression strategy; see deflateInit2() below for details */ + +#define Z_BINARY 0 +#define Z_TEXT 1 +#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ +#define Z_UNKNOWN 2 +/* Possible values of the data_type field (though see inflate()) */ + +#define Z_DEFLATED 8 +/* The deflate compression method (the only one supported in this version) */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +#define zlib_version zlibVersion() +/* for compatibility with versions < 1.0.2 */ + + /* basic functions */ + +ZEXTERN const char * ZEXPORT zlibVersion OF((void)); +/* The application can compare zlibVersion and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is + not compatible with the zlib.h header file used by the application. + This check is automatically made by deflateInit and inflateInit. + */ + +/* +ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level)); + + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. + If zalloc and zfree are set to Z_NULL, deflateInit updates them to + use default allocation functions. + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at + all (the input data is simply copied a block at a time). + Z_DEFAULT_COMPRESSION requests a default compromise between speed and + compression (currently equivalent to level 6). + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if level is not a valid compression level, + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). + msg is set to null if there is no error message. deflateInit does not + perform any compression: this will be done by deflate(). +*/ + + +ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); +/* + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce some + output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Provide more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary (in interactive applications). + Some output may be provided even if flush is not set. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming + more output, and updating avail_in or avail_out accordingly; avail_out + should never be zero before the call. The application can consume the + compressed output when it wants, for example when the output buffer is full + (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK + and with zero avail_out, it must be called again after making room in the + output buffer because there might be more output pending. + + Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to + decide how much data to accumualte before producing output, in order to + maximize compression. + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In particular + avail_in is zero after the call if enough output space has been provided + before the call.) Flushing may degrade compression for some compression + algorithms and so it should be used only when necessary. + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + compression. + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out is greater than six to avoid repeated flush markers due to + avail_out == 0 on return. + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there + was enough output space; if deflate returns with Z_OK, this function must be + called again with Z_FINISH and more output space (updated avail_out) but no + more input data, until it returns with Z_STREAM_END or an error. After + deflate has returned Z_STREAM_END, the only possible operations on the + stream are deflateReset or deflateEnd. + + Z_FINISH can be used immediately after deflateInit if all the compression + is to be done in a single step. In this case, avail_out must be at least + the value returned by deflateBound (see below). If deflate does not return + Z_STREAM_END, then it must be called again as described above. + + deflate() sets strm->adler to the adler32 checksum of all input read + so far (that is, total_in bytes). + + deflate() may update strm->data_type if it can make a good guess about + the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered + binary. This field is only for information purposes and does not affect + the compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible + (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not + fatal, and deflate() can be called again with more input and more output + space to continue compressing. +*/ + + +ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any + pending output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, + msg may be set but then points to a static string (which must not be + deallocated). +*/ + + +/* +ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); + + Initializes the internal stream state for decompression. The fields + next_in, avail_in, zalloc, zfree and opaque must be initialized before by + the caller. If next_in is not Z_NULL and avail_in is large enough (the exact + value depends on the compression method), inflateInit determines the + compression method from the zlib header and allocates all data structures + accordingly; otherwise the allocation will be deferred to the first call of + inflate. If zalloc and zfree are set to Z_NULL, inflateInit updates them to + use default allocation functions. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller. msg is set to null if there is no error + message. inflateInit does not perform any decompression apart from reading + the zlib header if present: this will be done by inflate(). (So next_in and + avail_in may be modified, but next_out and avail_out are unchanged.) +*/ + + +ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); +/* + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in is updated and processing + will resume at this point for the next call of inflate(). + + - Provide more output starting at next_out and update next_out and avail_out + accordingly. inflate() provides as much output as possible, until there + is no more input data or no more space in the output buffer (see below + about the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming + more output, and updating the next_* and avail_* values accordingly. + The application can consume the uncompressed output when it wants, for + example when the output buffer is full (avail_out == 0), or after each + call of inflate(). If inflate returns Z_OK and with zero avail_out, it + must be called again after making room in the output buffer because there + might be more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, + Z_FINISH, or Z_BLOCK. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() stop + if and when it gets to the next deflate block boundary. When decoding the + zlib or gzip format, this will cause inflate() to return immediately after + the header and before the first block. When doing a raw inflate, inflate() + will go ahead and process the first block, and will return when it gets to + the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + Also to assist in this, on return inflate() will set strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 + if inflate() is currently decoding the last block in the deflate stream, + plus 128 if inflate() returned immediately after decoding an end-of-block + code or decoding the complete header up to just before the first byte of the + deflate stream. The end-of-block will not be indicated until all of the + uncompressed data from that block has been written to strm->next_out. The + number of unused bits may in general be greater than seven, except when + bit 7 of data_type is set, in which case the number of unused bits will be + less than eight. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step + (a single call of inflate), the parameter flush should be set to + Z_FINISH. In this case all pending input is processed and all pending + output is flushed; avail_out must be large enough to hold all the + uncompressed data. (The size of the uncompressed data may have been saved + by the compressor for this purpose.) The next operation on this stream must + be inflateEnd to deallocate the decompression state. The use of Z_FINISH + is never required, but can be used to inform inflate that a faster approach + may be used for the single inflate() call. + + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the only effect of the flush parameter in this implementation + is on the return value of inflate(), as noted below, or when it returns early + because Z_BLOCK is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the adler32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the adler32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed adler32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() will decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically. Any information + contained in the gzip header is not retained, so applications that need that + information should instead use raw inflate, see inflateInit2() below, or + inflateBack() and perform their own processing of the gzip header and + trailer. + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect check + value), Z_STREAM_ERROR if the stream structure was inconsistent (for example + if next_in or next_out was NULL), Z_MEM_ERROR if there was not enough memory, + Z_BUF_ERROR if no progress is possible or if there was not enough room in the + output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may then + call inflateSync() to look for a good compression block if a partial recovery + of the data is desired. +*/ + + +ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any + pending output. + + inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state + was inconsistent. In the error case, msg may be set but then points to a + static string (which must not be deallocated). +*/ + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy)); + + This is another version of deflateInit with more compression options. The + fields next_in, zalloc, zfree and opaque must be initialized before by + the caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead. + + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data + with no zlib header or trailer, and will not compute an adler32 check value. + + windowBits can also be greater than 15 for optional gzip encoding. Add + 16 to windowBits to write a simple gzip header and trailer around the + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), + no header crc, and the operating system will be set to 255 (unknown). If a + gzip stream is being written, strm->adler is a crc32 instead of an adler32. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but + is slow and reduces compression ratio; memLevel=9 uses maximum memory + for optimal speed. The default value is 8. See zconf.h for total memory + usage as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match), or Z_RLE to limit match distances to one (run-length + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman + coding and less string matching; it is somewhat intermediate between + Z_DEFAULT and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as fast as + Z_HUFFMAN_ONLY, but give better compression for PNG image data. The strategy + parameter only affects the compression ratio but not the correctness of the + compressed output even if it is not set appropriately. Z_FIXED prevents the + use of dynamic Huffman codes, allowing for a simpler decoder for special + applications. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid + method). msg is set to null if there is no error message. deflateInit2 does + not perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the compression dictionary from the given byte sequence + without producing any compressed output. This function must be called + immediately after deflateInit, deflateInit2 or deflateReset, before any + call of deflate. The compressor and decompressor must use exactly the same + dictionary (see inflateSetDictionary). + + The dictionary should consist of strings (byte sequences) that are likely + to be encountered later in the data to be compressed, with the most commonly + used strings preferably put towards the end of the dictionary. Using a + dictionary is most useful when the data to be compressed is short and can be + predicted with good accuracy; the data can then be compressed better than + with the default empty dictionary. + + Depending on the size of the compression data structures selected by + deflateInit or deflateInit2, a part of the dictionary may in effect be + discarded, for example if the dictionary is larger than the window size in + deflate or deflate2. Thus the strings most likely to be useful should be + put at the end of the dictionary, not at the front. In addition, the + current implementation of deflate will use at most the window size minus + 262 bytes of the provided dictionary. + + Upon return of this function, strm->adler is set to the adler32 value + of the dictionary; the decompressor may later use this value to determine + which dictionary has been used by the compressor. (The adler32 value + applies to the whole dictionary even if only a subset of the dictionary is + actually used by the compressor.) If a raw deflate was requested, then the + adler32 value is not computed and strm->adler is not set. + + deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a + parameter is invalid (such as NULL dictionary) or the stream state is + inconsistent (for example if deflate has already been called for this stream + or if the compression method is bsort). deflateSetDictionary does not + perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when several compression strategies will be + tried, for example when there are several ways of pre-processing the input + data with a filter. The streams that will be discarded should then be freed + by calling deflateEnd. Note that deflateCopy duplicates the internal + compression state which can be quite large, so this strategy is slow and + can consume lots of memory. + + deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm)); +/* + This function is equivalent to deflateEnd followed by deflateInit, + but does not free and reallocate all the internal compression state. + The stream will keep the same compression level and any other attributes + that may have been set by deflateInit2. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being NULL). +*/ + +ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, + int level, + int strategy)); +/* + Dynamically update the compression level and compression strategy. The + interpretation of level and strategy is as in deflateInit2. This can be + used to switch between compression and straight copy of the input data, or + to switch to a different kind of input data requiring a different + strategy. If the compression level is changed, the input available so far + is compressed with the old level (and may be flushed); the new level will + take effect only at the next call of deflate(). + + Before the call of deflateParams, the stream state must be set as for + a call of deflate(), since the currently available input may have to + be compressed and flushed. In particular, strm->avail_out must be non-zero. + + deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source + stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR + if strm->avail_out was zero. +*/ + +ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm, + int good_length, + int max_lazy, + int nice_length, + int max_chain)); +/* + Fine tune deflate's internal compression parameters. This should only be + used by someone who understands the algorithm used by zlib's deflate for + searching for the best matching string, and even then only by the most + fanatic optimizer trying to squeeze out the last compressed bit for their + specific input data. Read the deflate.c source code for the meaning of the + max_lazy, good_length, nice_length, and max_chain parameters. + + deflateTune() can be called after deflateInit() or deflateInit2(), and + returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. + */ + +ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm, + uLong sourceLen)); +/* + deflateBound() returns an upper bound on the compressed size after + deflation of sourceLen bytes. It must be called after deflateInit() + or deflateInit2(). This would be used to allocate an output buffer + for deflation in a single pass, and so would be called before deflate(). +*/ + +ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + deflatePrime() inserts bits in the deflate output stream. The intent + is that this function is used to start off the deflate output with the + bits leftover from a previous deflate stream when appending to it. As such, + this function can only be used for raw deflate, and must be used before the + first deflate() call after a deflateInit2() or deflateReset(). bits must be + less than or equal to 16, and that many of the least significant bits of + value will be inserted in the output. + + deflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm, + gz_headerp head)); +/* + deflateSetHeader() provides gzip header information for when a gzip + stream is requested by deflateInit2(). deflateSetHeader() may be called + after deflateInit2() or deflateReset() and before the first call of + deflate(). The text, time, os, extra field, name, and comment information + in the provided gz_header structure are written to the gzip header (xflag is + ignored -- the extra flags are set according to the compression level). The + caller must assure that, if not Z_NULL, name and comment are terminated with + a zero byte, and that if extra is not Z_NULL, that extra_len bytes are + available there. If hcrc is true, a gzip header crc is included. Note that + the current versions of the command-line version of gzip (up through version + 1.3.x) do not support header crc's, and will report that it is a "multi-part + gzip file" and give up. + + If deflateSetHeader is not used, the default gzip header has text false, + the time set to zero, and os set to 255, with no extra, name, or comment + fields. The gzip header is returned to the default state by deflateReset(). + + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, + int windowBits)); + + This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an adler32 or a crc32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is + a crc32 instead of an adler32. + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if a parameter is invalid (such as a null strm). msg + is set to null if there is no error message. inflateInit2 does not perform + any decompression apart from reading the zlib header if present: this will + be done by inflate(). (So next_in and avail_in may be modified, but next_out + and avail_out are unchanged.) +*/ + +ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the decompression dictionary from the given uncompressed byte + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the adler32 value returned by that call of inflate. + The compressor and decompressor must use exactly the same dictionary (see + deflateSetDictionary). For raw inflate, this function can be called + immediately after inflateInit2() or inflateReset() and before any call of + inflate() to set the dictionary. The application must insure that the + dictionary that was used for compression is provided. + + inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a + parameter is invalid (such as NULL dictionary) or the stream state is + inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the + expected one (incorrect adler32 value). inflateSetDictionary does not + perform any decompression: this will be done by subsequent calls of + inflate(). +*/ + +ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm)); +/* + Skips invalid compressed data until a full flush point (see above the + description of deflate with Z_FULL_FLUSH) can be found, or until all + available input is skipped. No output is provided. + + inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR + if no more input was provided, Z_DATA_ERROR if no flush point has been found, + or Z_STREAM_ERROR if the stream structure was inconsistent. In the success + case, the application may save the current current value of total_in which + indicates where valid compressed data was found. In the error case, the + application may repeatedly call inflateSync, providing more input each time, + until success or end of the input data. +*/ + +ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when randomly accessing a large stream. The + first pass through the stream can periodically record the inflate state, + allowing restarting inflate at those points when randomly accessing the + stream. + + inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm)); +/* + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate all the internal decompression state. + The stream will keep attributes that may have been set by inflateInit2. + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being NULL). +*/ + +ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + This function inserts bits in the inflate input stream. The intent is + that this function is used to start inflating at a bit position in the + middle of a byte. The provided bits will be used before any bytes are used + from next_in. This function should only be used with raw inflate, and + should be used before the first inflate() call after inflateInit2() or + inflateReset(). bits must be less than or equal to 16, and that many of the + least significant bits of value will be inserted in the input. + + inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm, + gz_headerp head)); +/* + inflateGetHeader() requests that gzip header information be stored in the + provided gz_header structure. inflateGetHeader() may be called after + inflateInit2() or inflateReset(), and before the first call of inflate(). + As inflate() processes the gzip stream, head->done is zero until the header + is completed, at which time head->done is set to one. If a zlib stream is + being decoded, then head->done is set to -1 to indicate that there will be + no gzip header information forthcoming. Note that Z_BLOCK can be used to + force inflate() to return immediately after header processing is complete + and before any actual data is decompressed. + + The text, time, xflags, and os fields are filled in with the gzip header + contents. hcrc is set to true if there is a header CRC. (The header CRC + was valid if done is set to one.) If extra is not Z_NULL, then extra_max + contains the maximum number of bytes to write to extra. Once done is true, + extra_len contains the actual extra field length, and extra contains the + extra field, or that field truncated if extra_max is less than extra_len. + If name is not Z_NULL, then up to name_max characters are written there, + terminated with a zero unless the length is greater than name_max. If + comment is not Z_NULL, then up to comm_max characters are written there, + terminated with a zero unless the length is greater than comm_max. When + any of extra, name, or comment are not Z_NULL and the respective field is + not present in the header, then that field is set to Z_NULL to signal its + absence. This allows the use of deflateSetHeader() with the returned + structure to duplicate the header. However if those fields are set to + allocated memory, then the application will need to save those pointers + elsewhere so that they can be eventually freed. + + If inflateGetHeader is not used, then the header information is simply + discarded. The header is always checked for validity, including the header + CRC if present. inflateReset() will reset the process to discard the header + information. The application would need to call inflateGetHeader() again to + retrieve the header from the next gzip stream. + + inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits, + unsigned char FAR *window)); + + Initialize the internal stream state for decompression using inflateBack() + calls. The fields zalloc, zfree and opaque in strm must be initialized + before the call. If zalloc and zfree are Z_NULL, then the default library- + derived memory allocation routines are used. windowBits is the base two + logarithm of the window size, in the range 8..15. window is a caller + supplied buffer of that size. Except for special applications where it is + assured that deflate was used with small window sizes, windowBits must be 15 + and a 32K byte window must be supplied to be able to decompress general + deflate streams. + + See inflateBack() for the usage of these routines. + + inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of + the paramaters are invalid, Z_MEM_ERROR if the internal state could not + be allocated, or Z_VERSION_ERROR if the version of the library does not + match the version of the header file. +*/ + +typedef unsigned (*in_func) OF((void FAR *, unsigned char FAR * FAR *)); +typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned)); + +ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc)); +/* + inflateBack() does a raw inflate with a single call using a call-back + interface for input and output. This is more efficient than inflate() for + file i/o applications in that it avoids copying between the output and the + sliding window by simply making the window itself the output buffer. This + function trusts the application to not change the output buffer passed by + the output function, at least until inflateBack() returns. + + inflateBackInit() must be called first to allocate the internal state + and to initialize the state with the user-provided window buffer. + inflateBack() may then be used multiple times to inflate a complete, raw + deflate stream with each call. inflateBackEnd() is then called to free + the allocated state. + + A raw deflate stream is one with no zlib or gzip header or trailer. + This routine would normally be used in a utility that reads zip or gzip + files and writes out uncompressed files. The utility would decode the + header and process the trailer on its own, hence this routine expects + only the raw deflate stream to decompress. This is different from the + normal behavior of inflate(), which expects either a zlib or gzip header and + trailer around the deflate stream. + + inflateBack() uses two subroutines supplied by the caller that are then + called by inflateBack() for input and output. inflateBack() calls those + routines until it reads a complete deflate stream and writes out all of the + uncompressed data, or until it encounters an error. The function's + parameters and return types are defined above in the in_func and out_func + typedefs. inflateBack() will call in(in_desc, &buf) which should return the + number of bytes of provided input, and a pointer to that input in buf. If + there is no input available, in() must return zero--buf is ignored in that + case--and inflateBack() will return a buffer error. inflateBack() will call + out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. out() + should return zero on success, or non-zero on failure. If out() returns + non-zero, inflateBack() will return with an error. Neither in() nor out() + are permitted to change the contents of the window provided to + inflateBackInit(), which is also the buffer that out() uses to write from. + The length written by out() will be at most the window size. Any non-zero + amount of input may be provided by in(). + + For convenience, inflateBack() can be provided input on the first call by + setting strm->next_in and strm->avail_in. If that input is exhausted, then + in() will be called. Therefore strm->next_in must be initialized before + calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called + immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in + must also be initialized, and then if strm->avail_in is not zero, input will + initially be taken from strm->next_in[0 .. strm->avail_in - 1]. + + The in_desc and out_desc parameters of inflateBack() is passed as the + first parameter of in() and out() respectively when they are called. These + descriptors can be optionally used to pass any information that the caller- + supplied in() and out() functions need to do their job. + + On return, inflateBack() will set strm->next_in and strm->avail_in to + pass back any unused input that was provided by the last in() call. The + return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR + if in() or out() returned an error, Z_DATA_ERROR if there was a format + error in the deflate stream (in which case strm->msg is set to indicate the + nature of the error), or Z_STREAM_ERROR if the stream was not properly + initialized. In the case of Z_BUF_ERROR, an input or output error can be + distinguished using strm->next_in which will be Z_NULL only if in() returned + an error. If strm->next is not Z_NULL, then the Z_BUF_ERROR was due to + out() returning non-zero. (in() will always be called before out(), so + strm->next_in is assured to be defined if out() returns non-zero.) Note + that inflateBack() cannot return Z_OK. +*/ + +ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm)); +/* + All memory allocated by inflateBackInit() is freed. + + inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream + state was inconsistent. +*/ + +ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void)); +/* Return flags indicating compile-time options. + + Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: + 1.0: size of uInt + 3.2: size of uLong + 5.4: size of voidpf (pointer) + 7.6: size of z_off_t + + Compiler, assembler, and debug options: + 8: DEBUG + 9: ASMV or ASMINF -- use ASM code + 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention + 11: 0 (reserved) + + One-time table building (smaller code, but not thread-safe if true): + 12: BUILDFIXED -- build static block decoding tables when needed + 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed + 14,15: 0 (reserved) + + Library content (indicates missing functionality): + 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking + deflate code when not needed) + 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect + and decode gzip streams (to avoid linking crc code) + 18-19: 0 (reserved) + + Operation variations (changes in library functionality): + 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate + 21: FASTEST -- deflate algorithm with only one, lowest compression level + 22,23: 0 (reserved) + + The sprintf variant used by gzprintf (zero is best): + 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format + 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! + 26: 0 = returns value, 1 = void -- 1 means inferred string length returned + + Remainder: + 27-31: 0 (reserved) + */ + + + /* utility functions */ + +/* + The following utility functions are implemented on top of the + basic stream-oriented functions. To simplify the interface, some + default options are assumed (compression level and memory usage, + standard memory allocation functions). The source code of these + utility functions can easily be modified if you need special options. +*/ + +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be at least the value returned + by compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed buffer. + This function can be used to compress a whole file at once if the + input file is mmap'ed. + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + +ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen, + int level)); +/* + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed buffer. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ + +ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen)); +/* + compressBound() returns an upper bound on the compressed size after + compress() or compress2() on sourceLen bytes. It would be used before + a compress() or compress2() call to allocate the destination buffer. +*/ + +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer. + This function can be used to decompress a whole file at once if the + input file is mmap'ed. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. +*/ + + +typedef voidp gzFile; + +ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); +/* + Opens a gzip (.gz) file for reading or writing. The mode parameter + is as in fopen ("rb" or "wb") but can also include a compression level + ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for + Huffman only compression as in "wb1h", or 'R' for run-length encoding + as in "wb1R". (See the description of deflateInit2 for more information + about the strategy parameter.) + + gzopen can be used to read a file which is not in gzip format; in this + case gzread will directly read from the file without decompression. + + gzopen returns NULL if the file could not be opened or if there was + insufficient memory to allocate the (de)compression state; errno + can be checked to distinguish the two cases (if errno is zero, the + zlib error is Z_MEM_ERROR). */ + +ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); +/* + gzdopen() associates a gzFile with the file descriptor fd. File + descriptors are obtained from calls like open, dup, creat, pipe or + fileno (in the file has been previously opened with fopen). + The mode parameter is as in gzopen. + The next call of gzclose on the returned gzFile will also close the + file descriptor fd, just like fclose(fdopen(fd), mode) closes the file + descriptor fd. If you want to keep fd open, use gzdopen(dup(fd), mode). + gzdopen returns NULL if there was insufficient memory to allocate + the (de)compression state. +*/ + +ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); +/* + Dynamically update the compression level or strategy. See the description + of deflateInit2 for the meaning of these parameters. + gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not + opened for writing. +*/ + +ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); +/* + Reads the given number of uncompressed bytes from the compressed file. + If the input file was not in gzip format, gzread copies the given number + of bytes into the buffer. + gzread returns the number of uncompressed bytes actually read (0 for + end of file, -1 for error). */ + +ZEXTERN int ZEXPORT gzwrite OF((gzFile file, + voidpc buf, unsigned len)); +/* + Writes the given number of uncompressed bytes into the compressed file. + gzwrite returns the number of uncompressed bytes actually written + (0 in case of error). +*/ + +ZEXTERN int ZEXPORTVA gzprintf OF((gzFile file, const char *format, ...)); +/* + Converts, formats, and writes the args to the compressed file under + control of the format string, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written (0 in case of error). The number of + uncompressed bytes written is limited to 4095. The caller should assure that + this limit is not exceeded. If it is exceeded, then gzprintf() will return + return an error (0) with nothing written. In this case, there may also be a + buffer overflow with unpredictable consequences, which is possible only if + zlib was compiled with the insecure functions sprintf() or vsprintf() + because the secure snprintf() or vsnprintf() functions were not available. +*/ + +ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); +/* + Writes the given null-terminated string to the compressed file, excluding + the terminating null character. + gzputs returns the number of characters written, or -1 in case of error. +*/ + +ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); +/* + Reads bytes from the compressed file until len-1 characters are read, or + a newline character is read and transferred to buf, or an end-of-file + condition is encountered. The string is then terminated with a null + character. + gzgets returns buf, or Z_NULL in case of error. +*/ + +ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); +/* + Writes c, converted to an unsigned char, into the compressed file. + gzputc returns the value that was written, or -1 in case of error. +*/ + +ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); +/* + Reads one byte from the compressed file. gzgetc returns this byte + or -1 in case of end of file or error. +*/ + +ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); +/* + Push one character back onto the stream to be read again later. + Only one character of push-back is allowed. gzungetc() returns the + character pushed, or -1 on failure. gzungetc() will fail if a + character has been pushed but not read yet, or if c is -1. The pushed + character will be discarded if the stream is repositioned with gzseek() + or gzrewind(). +*/ + +ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); +/* + Flushes all pending output into the compressed file. The parameter + flush is as in the deflate() function. The return value is the zlib + error number (see function gzerror below). gzflush returns Z_OK if + the flush parameter is Z_FINISH and all output could be flushed. + gzflush should be called only when strictly necessary because it can + degrade compression. +*/ + +ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, + z_off_t offset, int whence)); +/* + Sets the starting position for the next gzread or gzwrite on the + given compressed file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); + the value SEEK_END is not supported. + If the file is opened for reading, this function is emulated but can be + extremely slow. If the file is opened for writing, only forward seeks are + supported; gzseek then compresses a sequence of zeroes up to the new + starting position. + + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error, in + particular if the file is opened for writing and the new starting position + would be before the current position. +*/ + +ZEXTERN int ZEXPORT gzrewind OF((gzFile file)); +/* + Rewinds the given file. This function is supported only for reading. + + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) +*/ + +ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); +/* + Returns the starting position for the next gzread or gzwrite on the + given compressed file. This position represents a number of bytes in the + uncompressed data stream. + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +*/ + +ZEXTERN int ZEXPORT gzeof OF((gzFile file)); +/* + Returns 1 when EOF has previously been detected reading the given + input stream, otherwise zero. +*/ + +ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); +/* + Returns 1 if file is being read directly without decompression, otherwise + zero. +*/ + +ZEXTERN int ZEXPORT gzclose OF((gzFile file)); +/* + Flushes all pending output if necessary, closes the compressed file + and deallocates all the (de)compression state. The return value is the zlib + error number (see function gzerror below). +*/ + +ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); +/* + Returns the error message for the last error which occurred on the + given compressed file. errnum is set to zlib error number. If an + error occurred in the file system and not in the compression library, + errnum is set to Z_ERRNO and the application may consult errno + to get the exact error code. +*/ + +ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); +/* + Clears the error and end-of-file flags for file. This is analogous to the + clearerr() function in stdio. This is useful for continuing to read a gzip + file that is being written concurrently. +*/ + + /* checksum functions */ + +/* + These functions are not related to compression but are exported + anyway because they might be useful in applications using the + compression library. +*/ + +ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. If buf is NULL, this function returns + the required initial value for the checksum. + An Adler-32 checksum is almost as reliable as a CRC32 but can be computed + much faster. Usage example: + + uLong adler = adler32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + +ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2, + z_off_t len2)); +/* + Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 + and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for + each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of + seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. +*/ + +ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); +/* + Update a running CRC-32 with the bytes buf[0..len-1] and return the + updated CRC-32. If buf is NULL, this function returns the required initial + value for the for the crc. Pre- and post-conditioning (one's complement) is + performed within this function so it shouldn't be done by the application. + Usage example: + + uLong crc = crc32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + crc = crc32(crc, buffer, length); + } + if (crc != original_crc) error(); +*/ + +ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2)); + +/* + Combine two CRC-32 check values into one. For two sequences of bytes, + seq1 and seq2 with lengths len1 and len2, CRC-32 check values were + calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 + check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and + len2. +*/ + + + /* various hacks, don't look :) */ + +/* deflateInit and inflateInit are macros to allow checking the zlib version + * and the compiler's view of z_stream: + */ +ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method, + int windowBits, int memLevel, + int strategy, const char *version, + int stream_size)); +ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits, + unsigned char FAR *window, + const char *version, + int stream_size)); +#define deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, sizeof(z_stream)) +#define inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, sizeof(z_stream)) +#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, sizeof(z_stream)) +#define inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, sizeof(z_stream)) +#define inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, sizeof(z_stream)) + + +#if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL) + struct internal_state {int dummy;}; /* hack for buggy compilers */ +#endif + +ZEXTERN const char * ZEXPORT zError OF((int)); +ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp z)); +ZEXTERN const uLongf * ZEXPORT get_crc_table OF((void)); + +#ifdef __cplusplus +} +#endif + +#endif /* ZLIB_H */ Added: external/zlib/zutil.c ============================================================================== --- (empty file) +++ external/zlib/zutil.c Tue Jan 3 07:42:59 2006 @@ -0,0 +1,318 @@ +/* zutil.c -- target dependent utility functions for the compression library + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zutil.h" + +#ifndef NO_DUMMY_DECL +struct internal_state {int dummy;}; /* for buggy compilers */ +#endif + +const char * const z_errmsg[10] = { +"need dictionary", /* Z_NEED_DICT 2 */ +"stream end", /* Z_STREAM_END 1 */ +"", /* Z_OK 0 */ +"file error", /* Z_ERRNO (-1) */ +"stream error", /* Z_STREAM_ERROR (-2) */ +"data error", /* Z_DATA_ERROR (-3) */ +"insufficient memory", /* Z_MEM_ERROR (-4) */ +"buffer error", /* Z_BUF_ERROR (-5) */ +"incompatible version",/* Z_VERSION_ERROR (-6) */ +""}; + + +const char * ZEXPORT zlibVersion() +{ + return ZLIB_VERSION; +} + +uLong ZEXPORT zlibCompileFlags() +{ + uLong flags; + + flags = 0; + switch (sizeof(uInt)) { + case 2: break; + case 4: flags += 1; break; + case 8: flags += 2; break; + default: flags += 3; + } + switch (sizeof(uLong)) { + case 2: break; + case 4: flags += 1 << 2; break; + case 8: flags += 2 << 2; break; + default: flags += 3 << 2; + } + switch (sizeof(voidpf)) { + case 2: break; + case 4: flags += 1 << 4; break; + case 8: flags += 2 << 4; break; + default: flags += 3 << 4; + } + switch (sizeof(z_off_t)) { + case 2: break; + case 4: flags += 1 << 6; break; + case 8: flags += 2 << 6; break; + default: flags += 3 << 6; + } +#ifdef DEBUG + flags += 1 << 8; +#endif +#if defined(ASMV) || defined(ASMINF) + flags += 1 << 9; +#endif +#ifdef ZLIB_WINAPI + flags += 1 << 10; +#endif +#ifdef BUILDFIXED + flags += 1 << 12; +#endif +#ifdef DYNAMIC_CRC_TABLE + flags += 1 << 13; +#endif +#ifdef NO_GZCOMPRESS + flags += 1L << 16; +#endif +#ifdef NO_GZIP + flags += 1L << 17; +#endif +#ifdef PKZIP_BUG_WORKAROUND + flags += 1L << 20; +#endif +#ifdef FASTEST + flags += 1L << 21; +#endif +#ifdef STDC +# ifdef NO_vsnprintf + flags += 1L << 25; +# ifdef HAS_vsprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_vsnprintf_void + flags += 1L << 26; +# endif +# endif +#else + flags += 1L << 24; +# ifdef NO_snprintf + flags += 1L << 25; +# ifdef HAS_sprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_snprintf_void + flags += 1L << 26; +# endif +# endif +#endif + return flags; +} + +#ifdef DEBUG + +# ifndef verbose +# define verbose 0 +# endif +int z_verbose = verbose; + +void z_error (m) + char *m; +{ + fprintf(stderr, "%s\n", m); + exit(1); +} +#endif + +/* exported to allow conversion of error code to string for compress() and + * uncompress() + */ +const char * ZEXPORT zError(err) + int err; +{ + return ERR_MSG(err); +} + +#if defined(_WIN32_WCE) + /* The Microsoft C Run-Time Library for Windows CE doesn't have + * errno. We define it as a global variable to simplify porting. + * Its value is always 0 and should not be used. + */ + int errno = 0; +#endif + +#ifndef HAVE_MEMCPY + +void zmemcpy(dest, source, len) + Bytef* dest; + const Bytef* source; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = *source++; /* ??? to be unrolled */ + } while (--len != 0); +} + +int zmemcmp(s1, s2, len) + const Bytef* s1; + const Bytef* s2; + uInt len; +{ + uInt j; + + for (j = 0; j < len; j++) { + if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1; + } + return 0; +} + +void zmemzero(dest, len) + Bytef* dest; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = 0; /* ??? to be unrolled */ + } while (--len != 0); +} +#endif + + +#ifdef SYS16BIT + +#ifdef __TURBOC__ +/* Turbo C in 16-bit mode */ + +# define MY_ZCALLOC + +/* Turbo C malloc() does not allow dynamic allocation of 64K bytes + * and farmalloc(64K) returns a pointer with an offset of 8, so we + * must fix the pointer. Warning: the pointer must be put back to its + * original form in order to free it, use zcfree(). + */ + +#define MAX_PTR 10 +/* 10*64K = 640K */ + +local int next_ptr = 0; + +typedef struct ptr_table_s { + voidpf org_ptr; + voidpf new_ptr; +} ptr_table; + +local ptr_table table[MAX_PTR]; +/* This table is used to remember the original form of pointers + * to large buffers (64K). Such pointers are normalized with a zero offset. + * Since MSDOS is not a preemptive multitasking OS, this table is not + * protected from concurrent access. This hack doesn't work anyway on + * a protected system like OS/2. Use Microsoft C instead. + */ + +voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) +{ + voidpf buf = opaque; /* just to make some compilers happy */ + ulg bsize = (ulg)items*size; + + /* If we allocate less than 65520 bytes, we assume that farmalloc + * will return a usable pointer which doesn't have to be normalized. + */ + if (bsize < 65520L) { + buf = farmalloc(bsize); + if (*(ush*)&buf != 0) return buf; + } else { + buf = farmalloc(bsize + 16L); + } + if (buf == NULL || next_ptr >= MAX_PTR) return NULL; + table[next_ptr].org_ptr = buf; + + /* Normalize the pointer to seg:0 */ + *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4; + *(ush*)&buf = 0; + table[next_ptr++].new_ptr = buf; + return buf; +} + +void zcfree (voidpf opaque, voidpf ptr) +{ + int n; + if (*(ush*)&ptr != 0) { /* object < 64K */ + farfree(ptr); + return; + } + /* Find the original pointer */ + for (n = 0; n < next_ptr; n++) { + if (ptr != table[n].new_ptr) continue; + + farfree(table[n].org_ptr); + while (++n < next_ptr) { + table[n-1] = table[n]; + } + next_ptr--; + return; + } + ptr = opaque; /* just to make some compilers happy */ + Assert(0, "zcfree: ptr not found"); +} + +#endif /* __TURBOC__ */ + + +#ifdef M_I86 +/* Microsoft C in 16-bit mode */ + +# define MY_ZCALLOC + +#if (!defined(_MSC_VER) || (_MSC_VER <= 600)) +# define _halloc halloc +# define _hfree hfree +#endif + +voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) +{ + if (opaque) opaque = 0; /* to make compiler happy */ + return _halloc((long)items, size); +} + +void zcfree (voidpf opaque, voidpf ptr) +{ + if (opaque) opaque = 0; /* to make compiler happy */ + _hfree(ptr); +} + +#endif /* M_I86 */ + +#endif /* SYS16BIT */ + + +#ifndef MY_ZCALLOC /* Any system without a special alloc function */ + +#ifndef STDC +extern voidp malloc OF((uInt size)); +extern voidp calloc OF((uInt items, uInt size)); +extern void free OF((voidpf ptr)); +#endif + +voidpf zcalloc (opaque, items, size) + voidpf opaque; + unsigned items; + unsigned size; +{ + if (opaque) items += size - size; /* make compiler happy */ + return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) : + (voidpf)calloc(items, size); +} + +void zcfree (opaque, ptr) + voidpf opaque; + voidpf ptr; +{ + free(ptr); + if (opaque) return; /* make compiler happy */ +} + +#endif /* MY_ZCALLOC */ Added: external/zlib/zutil.h ============================================================================== --- (empty file) +++ external/zlib/zutil.h Tue Jan 3 07:42:59 2006 @@ -0,0 +1,269 @@ +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef ZUTIL_H +#define ZUTIL_H + +#define ZLIB_INTERNAL +#include "zlib.h" + +#ifdef STDC +# ifndef _WIN32_WCE +# include +# endif +# include +# include +#endif +#ifdef NO_ERRNO_H +# ifdef _WIN32_WCE + /* The Microsoft C Run-Time Library for Windows CE doesn't have + * errno. We define it as a global variable to simplify porting. + * Its value is always 0 and should not be used. We rename it to + * avoid conflict with other libraries that use the same workaround. + */ +# define errno z_errno +# endif + extern int errno; +#else +# ifndef _WIN32_WCE +# include +# endif +#endif + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + +typedef unsigned char uch; +typedef uch FAR uchf; +typedef unsigned short ush; +typedef ush FAR ushf; +typedef unsigned long ulg; + +extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ +/* (size given to avoid silly warnings with Visual C++) */ + +#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] + +#define ERR_RETURN(strm,err) \ + return (strm->msg = (char*)ERR_MSG(err), (err)) +/* To be used only when the state is known to be valid */ + + /* common constants */ + +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif +/* default windowBits for decompression. MAX_WBITS is for compression only */ + +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +/* default memLevel */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + + /* target dependencies */ + +#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) +# define OS_CODE 0x00 +# if defined(__TURBOC__) || defined(__BORLANDC__) +# if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) + /* Allow compilation with ANSI keywords only enabled */ + void _Cdecl farfree( void *block ); + void *_Cdecl farmalloc( unsigned long nbytes ); +# else +# include +# endif +# else /* MSC or DJGPP */ +# include +# endif +#endif + +#ifdef AMIGA +# define OS_CODE 0x01 +#endif + +#if defined(VAXC) || defined(VMS) +# define OS_CODE 0x02 +# define F_OPEN(name, mode) \ + fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") +#endif + +#if defined(ATARI) || defined(atarist) +# define OS_CODE 0x05 +#endif + +#ifdef OS2 +# define OS_CODE 0x06 +# ifdef M_I86 + #include +# endif +#endif + +#if defined(MACOS) || defined(TARGET_OS_MAC) +# define OS_CODE 0x07 +# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os +# include /* for fdopen */ +# else +# ifndef fdopen +# define fdopen(fd,mode) NULL /* No fdopen() */ +# endif +# endif +#endif + +#ifdef TOPS20 +# define OS_CODE 0x0a +#endif + +#ifdef WIN32 +# ifndef __CYGWIN__ /* Cygwin is Unix, not Win32 */ +# define OS_CODE 0x0b +# endif +#endif + +#ifdef __50SERIES /* Prime/PRIMOS */ +# define OS_CODE 0x0f +#endif + +#if defined(_BEOS_) || defined(RISCOS) +# define fdopen(fd,mode) NULL /* No fdopen() */ +#endif + +#if (defined(_MSC_VER) && (_MSC_VER > 600)) +# if defined(_WIN32_WCE) +# define fdopen(fd,mode) NULL /* No fdopen() */ +# ifndef _PTRDIFF_T_DEFINED + typedef int ptrdiff_t; +# define _PTRDIFF_T_DEFINED +# endif +# else +# define fdopen(fd,type) _fdopen(fd,type) +# endif +#endif + + /* common defaults */ + +#ifndef OS_CODE +# define OS_CODE 0x03 /* assume Unix */ +#endif + +#ifndef F_OPEN +# define F_OPEN(name, mode) fopen((name), (mode)) +#endif + + /* functions */ + +#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif +#if defined(__CYGWIN__) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif +#ifndef HAVE_VSNPRINTF +# ifdef MSDOS + /* vsnprintf may exist on some MS-DOS compilers (DJGPP?), + but for now we just assume it doesn't. */ +# define NO_vsnprintf +# endif +# ifdef __TURBOC__ +# define NO_vsnprintf +# endif +# ifdef WIN32 + /* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ +# if !defined(vsnprintf) && !defined(NO_vsnprintf) +# define vsnprintf _vsnprintf +# endif +# endif +# ifdef __SASC +# define NO_vsnprintf +# endif +#endif +#ifdef VMS +# define NO_vsnprintf +#endif + +#if defined(pyr) +# define NO_MEMCPY +#endif +#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) + /* Use our own functions for small and medium model with MSC <= 5.0. + * You may have to use the same strategy for Borland C (untested). + * The __SC__ check is for Symantec. + */ +# define NO_MEMCPY +#endif +#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) +# define HAVE_MEMCPY +#endif +#ifdef HAVE_MEMCPY +# ifdef SMALL_MEDIUM /* MSDOS small or medium model */ +# define zmemcpy _fmemcpy +# define zmemcmp _fmemcmp +# define zmemzero(dest, len) _fmemset(dest, 0, len) +# else +# define zmemcpy memcpy +# define zmemcmp memcmp +# define zmemzero(dest, len) memset(dest, 0, len) +# endif +#else + extern void zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); + extern int zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); + extern void zmemzero OF((Bytef* dest, uInt len)); +#endif + +/* Diagnostic functions */ +#ifdef DEBUG +# include + extern int z_verbose; + extern void z_error OF((char *m)); +# define Assert(cond,msg) {if(!(cond)) z_error(msg);} +# define Trace(x) {if (z_verbose>=0) fprintf x ;} +# define Tracev(x) {if (z_verbose>0) fprintf x ;} +# define Tracevv(x) {if (z_verbose>1) fprintf x ;} +# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;} +# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + + +voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); +void zcfree OF((voidpf opaque, voidpf ptr)); + +#define ZALLOC(strm, items, size) \ + (*((strm)->zalloc))((strm)->opaque, (items), (size)) +#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) +#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} + +#endif /* ZUTIL_H */ From python-checkins at python.org Tue Jan 3 07:44:21 2006 From: python-checkins at python.org (martin.v.loewis) Date: Tue, 3 Jan 2006 07:44:21 +0100 (CET) Subject: [Python-checkins] commit of r41893 - external/zlib-1.2.3 Message-ID: <20060103064421.3198A1E4002@bag.python.org> Author: martin.v.loewis Date: Tue Jan 3 07:44:20 2006 New Revision: 41893 Added: external/zlib-1.2.3/ - copied from r41892, external/zlib/ Log: Tag zlib 1.2.3. From python-checkins at python.org Tue Jan 3 07:44:59 2006 From: python-checkins at python.org (martin.v.loewis) Date: Tue, 3 Jan 2006 07:44:59 +0100 (CET) Subject: [Python-checkins] commit of r41894 - python/trunk/Modules/zlib Message-ID: <20060103064459.6160C1E4007@bag.python.org> Author: martin.v.loewis Date: Tue Jan 3 07:44:59 2006 New Revision: 41894 Added: python/trunk/Modules/zlib/ - copied from r41893, external/zlib/ Log: Import zlib into Python tree. From python-checkins at python.org Tue Jan 3 07:48:39 2006 From: python-checkins at python.org (martin.v.loewis) Date: Tue, 3 Jan 2006 07:48:39 +0100 (CET) Subject: [Python-checkins] commit of r41895 - in python/trunk/Modules/zlib: amiga as400 contrib examples msdos old projects qnx win32 Message-ID: <20060103064839.411C11E4002@bag.python.org> Author: martin.v.loewis Date: Tue Jan 3 07:48:38 2006 New Revision: 41895 Removed: python/trunk/Modules/zlib/amiga/ python/trunk/Modules/zlib/as400/ python/trunk/Modules/zlib/contrib/ python/trunk/Modules/zlib/examples/ python/trunk/Modules/zlib/msdos/ python/trunk/Modules/zlib/old/ python/trunk/Modules/zlib/projects/ python/trunk/Modules/zlib/qnx/ python/trunk/Modules/zlib/win32/ Log: Remove unneeded directories. From python-checkins at python.org Tue Jan 3 08:10:15 2006 From: python-checkins at python.org (martin.v.loewis) Date: Tue, 3 Jan 2006 08:10:15 +0100 (CET) Subject: [Python-checkins] commit of r41896 - in python/trunk: PC/config.c PCbuild/pcbuild.sln PCbuild/pythoncore.vcproj PCbuild/readme.txt PCbuild/zlib.vcproj Tools/msi/msi.py Message-ID: <20060103071015.DD8161E4002@bag.python.org> Author: martin.v.loewis Date: Tue Jan 3 08:10:14 2006 New Revision: 41896 Removed: python/trunk/PCbuild/zlib.vcproj Modified: python/trunk/PC/config.c python/trunk/PCbuild/pcbuild.sln python/trunk/PCbuild/pythoncore.vcproj python/trunk/PCbuild/readme.txt python/trunk/Tools/msi/msi.py Log: Make zlib builtin. Modified: python/trunk/PC/config.c ============================================================================== --- python/trunk/PC/config.c (original) +++ python/trunk/PC/config.c Tue Jan 3 08:10:14 2006 @@ -56,6 +56,7 @@ extern void init_winreg(void); extern void initdatetime(void); extern void initfunctional(void); +extern void initzlib(void); extern void init_multibytecodec(void); extern void init_codecs_cn(void); @@ -133,7 +134,8 @@ {"xxsubtype", initxxsubtype}, {"zipimport", initzipimport}, - + {"zlib", initzlib}, + /* CJK codecs */ {"_multibytecodec", init_multibytecodec}, {"_codecs_cn", init_codecs_cn}, Modified: python/trunk/PCbuild/pcbuild.sln ============================================================================== Binary files. No diff available. Modified: python/trunk/PCbuild/pythoncore.vcproj ============================================================================== --- python/trunk/PCbuild/pythoncore.vcproj (original) +++ python/trunk/PCbuild/pythoncore.vcproj Tue Jan 3 08:10:14 2006 @@ -226,6 +226,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Modified: python/trunk/PCbuild/readme.txt ============================================================================== --- python/trunk/PCbuild/readme.txt (original) +++ python/trunk/PCbuild/readme.txt Tue Jan 3 08:10:14 2006 @@ -124,21 +124,6 @@ nmake -f makefile.vc nmake -f makefile.vc install -zlib - Python wrapper for the zlib compression library. Get the source code - for version 1.2.3 from a convenient mirror at: - http://www.gzip.org/zlib/ - Unpack into dist\zlib-1.2.3. - A custom pre-link step in the zlib project settings should manage to - build zlib-1.2.3\zlib.lib by magic before zlib.pyd (or zlib_d.pyd) is - linked in PCbuild\. - However, the zlib project is not smart enough to remove anything under - zlib-1.2.3\ when you do a clean, so if you want to rebuild zlib.lib - you need to clean up zlib-1.2.3\ by hand. - When building zlib.pyd for Itanium, the pre-link build step won't work, - so you will need to invoke nmake manually, using an IA64 build - environment. - bz2 Python wrapper for the libbz2 compression library. Homepage http://sources.redhat.com/bzip2/ Deleted: /python/trunk/PCbuild/zlib.vcproj ============================================================================== --- /python/trunk/PCbuild/zlib.vcproj Tue Jan 3 08:10:14 2006 +++ (empty file) @@ -1,245 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Modified: python/trunk/Tools/msi/msi.py ============================================================================== --- python/trunk/Tools/msi/msi.py (original) +++ python/trunk/Tools/msi/msi.py Tue Jan 3 08:10:14 2006 @@ -103,7 +103,6 @@ 'select.pyd', 'unicodedata.pyd', 'winsound.pyd', - 'zlib.pyd', '_elementtree.pyd', '_bsddb.pyd', '_socket.pyd', @@ -112,15 +111,9 @@ '_tkinter.pyd', ] -if major+minor <= "23": +if major+minor <= "24": extensions.extend([ - '_csv.pyd', - '_sre.pyd', - '_symtable.pyd', - '_winreg.pyd', - 'datetime.pyd' - 'mmap.pyd', - 'parser.pyd', + 'zlib.pyd', ]) # Well-known component UUIDs From python-checkins at python.org Tue Jan 3 08:38:56 2006 From: python-checkins at python.org (martin.v.loewis) Date: Tue, 3 Jan 2006 08:38:56 +0100 (CET) Subject: [Python-checkins] commit of r41897 - python/trunk/PCbuild/_bsddb.vcproj python/trunk/PCbuild/_socket.vcproj python/trunk/PCbuild/_testcapi.vcproj python/trunk/PCbuild/_tkinter.vcproj python/trunk/PCbuild/bz2.vcproj python/trunk/PCbuild/make_versioninfo.vcproj python/trunk/PCbuild/pyexpat.vcproj python/trunk/PCbuild/python.vcproj python/trunk/PCbuild/pythoncore.vcproj python/trunk/PCbuild/pythonw.vcproj python/trunk/PCbuild/select.vcproj python/trunk/PCbuild/unicodedata.vcproj python/trunk/PCbuild/w9xpopen.vcproj python/trunk/PCbuild/winsound.vcproj Message-ID: <20060103073856.051941E4002@bag.python.org> Author: martin.v.loewis Date: Tue Jan 3 08:38:51 2006 New Revision: 41897 Modified: python/trunk/PCbuild/_bsddb.vcproj python/trunk/PCbuild/_socket.vcproj python/trunk/PCbuild/_testcapi.vcproj python/trunk/PCbuild/_tkinter.vcproj python/trunk/PCbuild/bz2.vcproj python/trunk/PCbuild/make_versioninfo.vcproj python/trunk/PCbuild/pyexpat.vcproj python/trunk/PCbuild/python.vcproj python/trunk/PCbuild/pythoncore.vcproj python/trunk/PCbuild/pythonw.vcproj python/trunk/PCbuild/select.vcproj python/trunk/PCbuild/unicodedata.vcproj python/trunk/PCbuild/w9xpopen.vcproj python/trunk/PCbuild/winsound.vcproj Log: Patch #1307806: Use project defaults where possible Modified: python/trunk/PCbuild/_bsddb.vcproj ============================================================================== --- python/trunk/PCbuild/_bsddb.vcproj (original) +++ python/trunk/PCbuild/_bsddb.vcproj Tue Jan 3 08:38:51 2006 @@ -24,10 +24,6 @@ PreprocessorDefinitions="_DEBUG;WIN32;_WINDOWS" RuntimeLibrary="3" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-debug\_bsddb/_bsddb.pch" - AssemblerListingLocation=".\x86-temp-debug\_bsddb/" - ObjectFile=".\x86-temp-debug\_bsddb/" - ProgramDataBaseFileName=".\x86-temp-debug\_bsddb/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -48,13 +44,7 @@ ImportLibrary=".\./_bsddb_d.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - Modified: python/trunk/PCbuild/_socket.vcproj ============================================================================== --- python/trunk/PCbuild/_socket.vcproj (original) +++ python/trunk/PCbuild/_socket.vcproj Tue Jan 3 08:38:51 2006 @@ -24,10 +24,6 @@ PreprocessorDefinitions="_DEBUG;WIN32;_WINDOWS" RuntimeLibrary="3" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-debug\_socket/_socket.pch" - AssemblerListingLocation=".\x86-temp-debug\_socket/" - ObjectFile=".\x86-temp-debug\_socket/" - ProgramDataBaseFileName=".\x86-temp-debug\_socket/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -47,13 +43,7 @@ ImportLibrary=".\./_socket_d.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - Modified: python/trunk/PCbuild/_testcapi.vcproj ============================================================================== --- python/trunk/PCbuild/_testcapi.vcproj (original) +++ python/trunk/PCbuild/_testcapi.vcproj Tue Jan 3 08:38:51 2006 @@ -28,10 +28,6 @@ RuntimeLibrary="2" EnableFunctionLevelLinking="TRUE" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-release\_testcapi/_testcapi.pch" - AssemblerListingLocation=".\x86-temp-release\_testcapi/" - ObjectFile=".\x86-temp-release\_testcapi/" - ProgramDataBaseFileName=".\x86-temp-release\_testcapi/" WarningLevel="3" SuppressStartupBanner="TRUE" CompileAs="0"/> @@ -47,13 +43,7 @@ ImportLibrary=".\./_testcapi.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - Modified: python/trunk/PCbuild/_tkinter.vcproj ============================================================================== --- python/trunk/PCbuild/_tkinter.vcproj (original) +++ python/trunk/PCbuild/_tkinter.vcproj Tue Jan 3 08:38:51 2006 @@ -27,10 +27,6 @@ RuntimeLibrary="2" EnableFunctionLevelLinking="TRUE" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-release\_tkinter/_tkinter.pch" - AssemblerListingLocation=".\x86-temp-release\_tkinter/" - ObjectFile=".\x86-temp-release\_tkinter/" - ProgramDataBaseFileName=".\x86-temp-release\_tkinter/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -51,13 +47,7 @@ ImportLibrary=".\./_tkinter.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - - - - - - - - - - Modified: python/trunk/PCbuild/bz2.vcproj ============================================================================== --- python/trunk/PCbuild/bz2.vcproj (original) +++ python/trunk/PCbuild/bz2.vcproj Tue Jan 3 08:38:51 2006 @@ -25,10 +25,6 @@ PreprocessorDefinitions="_DEBUG;WIN32;_WINDOWS" RuntimeLibrary="3" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-debug\bz2/bz2.pch" - AssemblerListingLocation=".\x86-temp-debug\bz2/" - ObjectFile=".\x86-temp-debug\bz2/" - ProgramDataBaseFileName=".\x86-temp-debug\bz2/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -49,13 +45,7 @@ ImportLibrary=".\./bz2_d.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - Modified: python/trunk/PCbuild/make_versioninfo.vcproj ============================================================================== --- python/trunk/PCbuild/make_versioninfo.vcproj (original) +++ python/trunk/PCbuild/make_versioninfo.vcproj Tue Jan 3 08:38:51 2006 @@ -29,10 +29,6 @@ RuntimeLibrary="2" EnableFunctionLevelLinking="TRUE" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-release\make_versioninfo/make_versioninfo.pch" - AssemblerListingLocation=".\x86-temp-release\make_versioninfo/" - ObjectFile=".\x86-temp-release\make_versioninfo/" - ProgramDataBaseFileName=".\x86-temp-release\make_versioninfo/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -54,9 +50,7 @@ BaseAddress="0x1d000000" TargetMachine="1"/> + Name="VCMIDLTool"/> @@ -65,9 +59,7 @@ + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> @@ -131,10 +117,7 @@ + Name="VCResourceCompilerTool"/> - - - - - - Modified: python/trunk/PCbuild/pyexpat.vcproj ============================================================================== --- python/trunk/PCbuild/pyexpat.vcproj (original) +++ python/trunk/PCbuild/pyexpat.vcproj Tue Jan 3 08:38:51 2006 @@ -24,10 +24,6 @@ PreprocessorDefinitions="_DEBUG;HAVE_EXPAT_H;WIN32;_WINDOWS;XML_NS;XML_DTD;BYTEORDER=1234;XML_CONTEXT_BYTES=1024;XML_STATIC;HAVE_MEMMOVE" RuntimeLibrary="3" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-debug\pyexpat/pyexpat.pch" - AssemblerListingLocation=".\x86-temp-debug\pyexpat/" - ObjectFile=".\x86-temp-debug\pyexpat/" - ProgramDataBaseFileName=".\x86-temp-debug\pyexpat/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -47,13 +43,7 @@ ImportLibrary=".\./pyexpat_d.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Modified: python/trunk/PCbuild/python.vcproj ============================================================================== --- python/trunk/PCbuild/python.vcproj (original) +++ python/trunk/PCbuild/python.vcproj Tue Jan 3 08:38:51 2006 @@ -28,10 +28,6 @@ RuntimeLibrary="2" EnableFunctionLevelLinking="TRUE" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-release\python/python.pch" - AssemblerListingLocation=".\x86-temp-release\python/" - ObjectFile=".\x86-temp-release\python/" - ProgramDataBaseFileName=".\x86-temp-release\python/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -51,9 +47,7 @@ BaseAddress="0x1d000000" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCMIDLTool"/> + Name="VCMIDLTool"/> - - - - - - - - - - - - - - - - - - Modified: python/trunk/PCbuild/pythoncore.vcproj ============================================================================== --- python/trunk/PCbuild/pythoncore.vcproj (original) +++ python/trunk/PCbuild/pythoncore.vcproj Tue Jan 3 08:38:51 2006 @@ -29,10 +29,6 @@ RuntimeLibrary="2" EnableFunctionLevelLinking="TRUE" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-release\pythoncore/pythoncore.pch" - AssemblerListingLocation=".\x86-temp-release\pythoncore/" - ObjectFile=".\x86-temp-release\pythoncore/" - ProgramDataBaseFileName=".\x86-temp-release\pythoncore/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -52,13 +48,7 @@ ImportLibrary=".\./python25.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCMIDLTool"/> + Name="VCMIDLTool"/> - - - - - - - - - @@ -343,84 +289,12 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - Modified: python/trunk/PCbuild/pythonw.vcproj ============================================================================== --- python/trunk/PCbuild/pythonw.vcproj (original) +++ python/trunk/PCbuild/pythonw.vcproj Tue Jan 3 08:38:51 2006 @@ -24,10 +24,6 @@ PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS" RuntimeLibrary="3" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-debug\pythonw/pythonw.pch" - AssemblerListingLocation=".\x86-temp-debug\pythonw/" - ObjectFile=".\x86-temp-debug\pythonw/" - ProgramDataBaseFileName=".\x86-temp-debug\pythonw/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -46,13 +42,7 @@ BaseAddress="0x1d000000" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCMIDLTool"/> + Name="VCMIDLTool"/> - - - - - - - - - - - - - - - - - - Modified: python/trunk/PCbuild/select.vcproj ============================================================================== --- python/trunk/PCbuild/select.vcproj (original) +++ python/trunk/PCbuild/select.vcproj Tue Jan 3 08:38:51 2006 @@ -27,10 +27,6 @@ RuntimeLibrary="2" EnableFunctionLevelLinking="TRUE" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-release\select/select.pch" - AssemblerListingLocation=".\x86-temp-release\select/" - ObjectFile=".\x86-temp-release\select/" - ProgramDataBaseFileName=".\x86-temp-release\select/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -51,13 +47,7 @@ ImportLibrary=".\./select.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - Modified: python/trunk/PCbuild/unicodedata.vcproj ============================================================================== --- python/trunk/PCbuild/unicodedata.vcproj (original) +++ python/trunk/PCbuild/unicodedata.vcproj Tue Jan 3 08:38:51 2006 @@ -28,10 +28,6 @@ RuntimeLibrary="2" EnableFunctionLevelLinking="TRUE" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-release\unicodedata/unicodedata.pch" - AssemblerListingLocation=".\x86-temp-release\unicodedata/" - ObjectFile=".\x86-temp-release\unicodedata/" - ProgramDataBaseFileName=".\x86-temp-release\unicodedata/" WarningLevel="3" SuppressStartupBanner="TRUE" CompileAs="0"/> @@ -47,13 +43,7 @@ ImportLibrary=".\./unicodedata.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - Modified: python/trunk/PCbuild/w9xpopen.vcproj ============================================================================== --- python/trunk/PCbuild/w9xpopen.vcproj (original) +++ python/trunk/PCbuild/w9xpopen.vcproj Tue Jan 3 08:38:51 2006 @@ -25,10 +25,6 @@ BasicRuntimeChecks="3" RuntimeLibrary="3" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-debug\w9xpopen/w9xpopen.pch" - AssemblerListingLocation=".\x86-temp-debug\w9xpopen/" - ObjectFile=".\x86-temp-debug\w9xpopen/" - ProgramDataBaseFileName=".\x86-temp-debug\w9xpopen/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3"/> @@ -43,13 +39,7 @@ ProgramDatabaseFile=".\./w9xpopen_d.pdb" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - Modified: python/trunk/PCbuild/winsound.vcproj ============================================================================== --- python/trunk/PCbuild/winsound.vcproj (original) +++ python/trunk/PCbuild/winsound.vcproj Tue Jan 3 08:38:51 2006 @@ -26,10 +26,6 @@ BasicRuntimeChecks="3" RuntimeLibrary="3" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-debug\winsound/winsound.pch" - AssemblerListingLocation=".\x86-temp-debug\winsound/" - ObjectFile=".\x86-temp-debug\winsound/" - ProgramDataBaseFileName=".\x86-temp-debug\winsound/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -48,13 +44,7 @@ ImportLibrary=".\./winsound_d.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> @@ -114,13 +98,7 @@ ImportLibrary=".\./winsound.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - From python-checkins at python.org Tue Jan 3 08:42:15 2006 From: python-checkins at python.org (martin.v.loewis) Date: Tue, 3 Jan 2006 08:42:15 +0100 (CET) Subject: [Python-checkins] commit of r41898 - python/trunk/Misc/NEWS Message-ID: <20060103074215.93C811E4002@bag.python.org> Author: martin.v.loewis Date: Tue Jan 3 08:42:14 2006 New Revision: 41898 Modified: python/trunk/Misc/NEWS Log: Mention that zlib is now builtin on Windows. Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Tue Jan 3 08:42:14 2006 @@ -631,6 +631,9 @@ Build ----- +- The sources of zlib are now part of the Python distribution (zlib 1.2.3). + The zlib module is now builtin on Windows. + - Use -xcode=pic32 for CCSHARED on Solaris with SunPro. - Bug #1189330: configure did not correctly determine the necessary From python-checkins at python.org Tue Jan 3 09:17:32 2006 From: python-checkins at python.org (neal.norwitz) Date: Tue, 3 Jan 2006 09:17:32 +0100 (CET) Subject: [Python-checkins] commit of r41899 - python/branches/ssize_t/Objects/typeobject.c Message-ID: <20060103081732.AC7FE1E4002@bag.python.org> Author: neal.norwitz Date: Tue Jan 3 09:17:29 2006 New Revision: 41899 Modified: python/branches/ssize_t/Objects/typeobject.c Log: All tests pass for me now on amd64 Modified: python/branches/ssize_t/Objects/typeobject.c ============================================================================== --- python/branches/ssize_t/Objects/typeobject.c (original) +++ python/branches/ssize_t/Objects/typeobject.c Tue Jan 3 09:17:29 2006 @@ -3526,9 +3526,9 @@ } static PyObject * -wrap_intargfunc(PyObject *self, PyObject *args, void *wrapped) +wrap_ssizeargfunc(PyObject *self, PyObject *args, void *wrapped) { - intargfunc func = (intargfunc)wrapped; + ssizeargfunc func = (ssizeargfunc)wrapped; int i; if (!PyArg_ParseTuple(args, "i", &i)) @@ -3559,7 +3559,7 @@ static PyObject * wrap_sq_item(PyObject *self, PyObject *args, void *wrapped) { - intargfunc func = (intargfunc)wrapped; + ssizeargfunc func = (ssizeargfunc)wrapped; PyObject *arg; int i; @@ -3576,9 +3576,9 @@ } static PyObject * -wrap_intintargfunc(PyObject *self, PyObject *args, void *wrapped) +wrap_ssizessizeargfunc(PyObject *self, PyObject *args, void *wrapped) { - intintargfunc func = (intintargfunc)wrapped; + ssizessizeargfunc func = (ssizessizeargfunc)wrapped; int i, j; if (!PyArg_ParseTuple(args, "ii", &i, &j)) @@ -4963,13 +4963,13 @@ test_descr.notimplemented() */ SQSLOT("__add__", sq_concat, NULL, wrap_binaryfunc, "x.__add__(y) <==> x+y"), - SQSLOT("__mul__", sq_repeat, NULL, wrap_intargfunc, + SQSLOT("__mul__", sq_repeat, NULL, wrap_ssizeargfunc, "x.__mul__(n) <==> x*n"), - SQSLOT("__rmul__", sq_repeat, NULL, wrap_intargfunc, + SQSLOT("__rmul__", sq_repeat, NULL, wrap_ssizeargfunc, "x.__rmul__(n) <==> n*x"), SQSLOT("__getitem__", sq_item, slot_sq_item, wrap_sq_item, "x.__getitem__(y) <==> x[y]"), - SQSLOT("__getslice__", sq_slice, slot_sq_slice, wrap_intintargfunc, + SQSLOT("__getslice__", sq_slice, slot_sq_slice, wrap_ssizessizeargfunc, "x.__getslice__(i, j) <==> x[i:j]\n\ \n\ Use of negative indices is not supported."), @@ -4991,7 +4991,7 @@ SQSLOT("__iadd__", sq_inplace_concat, NULL, wrap_binaryfunc, "x.__iadd__(y) <==> x+=y"), SQSLOT("__imul__", sq_inplace_repeat, NULL, - wrap_intargfunc, "x.__imul__(y) <==> x*=y"), + wrap_ssizeargfunc, "x.__imul__(y) <==> x*=y"), MPSLOT("__len__", mp_length, slot_mp_length, wrap_inquiry, "x.__len__() <==> len(x)"), From python-checkins at python.org Tue Jan 3 09:20:36 2006 From: python-checkins at python.org (neal.norwitz) Date: Tue, 3 Jan 2006 09:20:36 +0100 (CET) Subject: [Python-checkins] commit of r41900 - python/branches/ssize_t/Include/object.h Message-ID: <20060103082036.231A21E4002@bag.python.org> Author: neal.norwitz Date: Tue Jan 3 09:20:35 2006 New Revision: 41900 Modified: python/branches/ssize_t/Include/object.h Log: I'm not sure if this is technically correct. intarg types are no longer used anywhere in the core with the exception of ./RISCOS/Modules swimodule.c and drawfmodule.c which have not been converted yet AFAIK. Modified: python/branches/ssize_t/Include/object.h ============================================================================== --- python/branches/ssize_t/Include/object.h (original) +++ python/branches/ssize_t/Include/object.h Tue Jan 3 09:20:35 2006 @@ -129,8 +129,8 @@ typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *); typedef int (*inquiry)(PyObject *); typedef int (*coercion)(PyObject **, PyObject **); -typedef PyObject *(*intargfunc)(PyObject *, int); -typedef PyObject *(*intintargfunc)(PyObject *, int, int); +typedef PyObject *(*intargfunc)(PyObject *, int) Py_DEPRECATED(2.5); +typedef PyObject *(*intintargfunc)(PyObject *, int, int) Py_DEPRECATED(2.5); typedef PyObject *(*ssizeargfunc)(PyObject *, Py_ssize_t); typedef PyObject *(*ssizessizeargfunc)(PyObject *, Py_ssize_t, Py_ssize_t); typedef int(*intobjargproc)(PyObject *, int, PyObject *); From python-checkins at python.org Tue Jan 3 09:22:57 2006 From: python-checkins at python.org (neal.norwitz) Date: Tue, 3 Jan 2006 09:22:57 +0100 (CET) Subject: [Python-checkins] commit of r41901 - peps/trunk/pep-3000.txt Message-ID: <20060103082257.CDFEC1E4002@bag.python.org> Author: neal.norwitz Date: Tue Jan 3 09:22:57 2006 New Revision: 41901 Modified: peps/trunk/pep-3000.txt Log: Found a few more deprecated functions and types Modified: peps/trunk/pep-3000.txt ============================================================================== --- peps/trunk/pep-3000.txt (original) +++ peps/trunk/pep-3000.txt Tue Jan 3 09:22:57 2006 @@ -83,7 +83,9 @@ PyFloat_AsString, PyFloat_AsReprString, PyFloat_AsStringEx, PySequence_In, PyEval_EvalFrame, PyEval_CallObject, _PyObject_Del, _PyObject_GC_Del, _PyObject_GC_Track, _PyObject_GC_UnTrack + PyString_AsEncodedString, PyString_AsDecodedString + typedefs: intargfunc, intintargfunc Built-in Namespace ================== From python-checkins at python.org Tue Jan 3 09:54:55 2006 From: python-checkins at python.org (martin.v.loewis) Date: Tue, 3 Jan 2006 09:54:55 +0100 (CET) Subject: [Python-checkins] commit of r41902 - python/branches/ssize_t/RISCOS/Modules/drawfmodule.c python/branches/ssize_t/RISCOS/Modules/swimodule.c Message-ID: <20060103085455.990851E4002@bag.python.org> Author: martin.v.loewis Date: Tue Jan 3 09:54:54 2006 New Revision: 41902 Modified: python/branches/ssize_t/RISCOS/Modules/drawfmodule.c python/branches/ssize_t/RISCOS/Modules/swimodule.c Log: Port to ssize_t, change is untested. Modified: python/branches/ssize_t/RISCOS/Modules/drawfmodule.c ============================================================================== --- python/branches/ssize_t/RISCOS/Modules/drawfmodule.c (original) +++ python/branches/ssize_t/RISCOS/Modules/drawfmodule.c Tue Jan 3 09:54:54 2006 @@ -66,7 +66,7 @@ if ((char*)d==end) pd->nobjs=k; } -static drawfile_object *findobj(PyDrawFObject *pd,int n) +static drawfile_object *findobj(PyDrawFObject *pd,Py_ssize_t n) { drawfile_diagram *dd=pd->drawf; drawfile_object *d=dd->objects; for(;n>0;n--) d=NEXT(d); @@ -520,14 +520,14 @@ return (PyObject*)p; } -static PyObject *drawf_repeat(PyDrawFObject *b,int i) +static PyObject *drawf_repeat(PyDrawFObject *b,Py_ssize_t i) { PyErr_SetString(PyExc_IndexError,"drawf repetition not implemented"); return NULL; } -static PyObject *drawf_item(PyDrawFObject *b,int i) +static PyObject *drawf_item(PyDrawFObject *b,Py_ssize_t i) { PyDrawFObject *c; - int size; + Py_ssize_t size; drawfile_diagram *dd; drawfile_object *d; if(i<0||i>=b->nobjs) @@ -546,9 +546,9 @@ return (PyObject*)c; } -static PyObject *drawf_slice(PyDrawFObject *b,int i,int j) +static PyObject *drawf_slice(PyDrawFObject *b,Py_ssize_t i,Py_ssize_t j) { PyDrawFObject *c; - int size,n; + Py_ssize_t size,n; drawfile_diagram *dd; drawfile_object *d; if(i<0||j>b->nobjs) @@ -570,7 +570,7 @@ return (PyObject*)c; } -static int drawf_ass_item(PyDrawFObject *b,int i,PyObject *v) +static int drawf_ass_item(PyDrawFObject *b,Py_ssize_t i,PyObject *v) { PyErr_SetString(PyExc_IndexError,"drawf ass not implemented"); return NULL; } @@ -587,7 +587,7 @@ } */ -static int drawf_ass_slice(PyDrawFObject *b,int i,int j,PyObject *v) +static int drawf_ass_slice(PyDrawFObject *b,Py_ssize_t i,Py_ssize_t j,PyObject *v) { PyErr_SetString(PyExc_IndexError,"drawf ass_slice not implemented"); return NULL; } @@ -595,11 +595,11 @@ static PySequenceMethods drawf_as_sequence= { (inquiry)drawf_len, (binaryfunc)drawf_concat, - (intargfunc)drawf_repeat, - (intargfunc)drawf_item, - (intintargfunc)drawf_slice, - (intobjargproc)drawf_ass_item, - (intintobjargproc)drawf_ass_slice, + (ssizeargfunc)drawf_repeat, + (ssizeargfunc)drawf_item, + (ssizessizeargfunc)drawf_slice, + (ssizeobjargproc)drawf_ass_item, + (ssizessizeobjargproc)drawf_ass_slice, }; static PyObject *PyDrawF_GetAttr(PyDrawFObject *s,char *name) Modified: python/branches/ssize_t/RISCOS/Modules/swimodule.c ============================================================================== --- python/branches/ssize_t/RISCOS/Modules/swimodule.c (original) +++ python/branches/ssize_t/RISCOS/Modules/swimodule.c Tue Jan 3 09:54:54 2006 @@ -215,12 +215,12 @@ return NULL; } -static PyObject *block_repeat(PyBlockObject *b,int i) +static PyObject *block_repeat(PyBlockObject *b,Py_ssize_t i) { PyErr_SetString(PyExc_IndexError,"block repetition not implemented"); return NULL; } -static PyObject *block_item(PyBlockObject *b,int i) +static PyObject *block_item(PyBlockObject *b,Py_ssize_t i) { if(i<0||4*i>=b->length) { PyErr_SetString(PyExc_IndexError,"block index out of range"); return NULL; @@ -228,8 +228,8 @@ return PyInt_FromLong(((long*)(b->block))[i]); } -static PyObject *block_slice(PyBlockObject *b,int i,int j) -{ int n,k; +static PyObject *block_slice(PyBlockObject *b,Py_ssize_t i,Py_ssize_t j) +{ Py_ssize_t n,k; long *p=b->block; PyObject *result; if(j>b->length/4) j=b->length/4; @@ -239,11 +239,11 @@ } n=j-i; result=PyList_New(n); - for(k=0;k=b->length/4) { PyErr_SetString(PyExc_IndexError,"block index out of range"); return -1; @@ -256,8 +256,8 @@ return 0; } -static int block_ass_slice(PyBlockObject *b,int i,int j,PyObject *v) -{ int n,k; +static int block_ass_slice(PyBlockObject *b,Py_ssize_t i,Py_ssize_t j,PyObject *v) +{ Py_ssize_t n,k; long *p=b->block; if(j>b->length/4) j=b->length/4; if(i<0||i>j) @@ -281,11 +281,11 @@ static PySequenceMethods block_as_sequence= { (inquiry)block_len, /*sq_length*/ (binaryfunc)block_concat, /*sq_concat*/ - (intargfunc)block_repeat, /*sq_repeat*/ - (intargfunc)block_item, /*sq_item*/ - (intintargfunc)block_slice, /*sq_slice*/ - (intobjargproc)block_ass_item, /*sq_ass_item*/ - (intintobjargproc)block_ass_slice, /*sq_ass_slice*/ + (ssizeargfunc)block_repeat, /*sq_repeat*/ + (ssizeargfunc)block_item, /*sq_item*/ + (ssizessizeargfunc)block_slice, /*sq_slice*/ + (ssizeobjargproc)block_ass_item, /*sq_ass_item*/ + (ssizessizeobjargproc)block_ass_slice, /*sq_ass_slice*/ }; static PyObject *PyBlock_GetAttr(PyBlockObject *s,char *name) From python-checkins at python.org Tue Jan 3 10:09:24 2006 From: python-checkins at python.org (martin.v.loewis) Date: Tue, 3 Jan 2006 10:09:24 +0100 (CET) Subject: [Python-checkins] commit of r41903 - python/branches/ssize_t/Objects/typeobject.c Message-ID: <20060103090924.C0BCC1E4009@bag.python.org> Author: martin.v.loewis Date: Tue Jan 3 10:09:23 2006 New Revision: 41903 Modified: python/branches/ssize_t/Objects/typeobject.c Log: Update wrappers for ssize_t. Modified: python/branches/ssize_t/Objects/typeobject.c ============================================================================== --- python/branches/ssize_t/Objects/typeobject.c (original) +++ python/branches/ssize_t/Objects/typeobject.c Tue Jan 3 10:09:23 2006 @@ -3529,19 +3529,19 @@ wrap_ssizeargfunc(PyObject *self, PyObject *args, void *wrapped) { ssizeargfunc func = (ssizeargfunc)wrapped; - int i; + Py_ssize_t i; - if (!PyArg_ParseTuple(args, "i", &i)) + if (!PyArg_ParseTuple(args, "n", &i)) return NULL; return (*func)(self, i); } -static int +static Py_ssize_t getindex(PyObject *self, PyObject *arg) { - int i; + Py_ssize_t i; - i = PyInt_AsLong(arg); + i = PyInt_AsSsize_t(arg); if (i == -1 && PyErr_Occurred()) return -1; if (i < 0) { @@ -3579,9 +3579,9 @@ wrap_ssizessizeargfunc(PyObject *self, PyObject *args, void *wrapped) { ssizessizeargfunc func = (ssizessizeargfunc)wrapped; - int i, j; + Py_ssize_t i, j; - if (!PyArg_ParseTuple(args, "ii", &i, &j)) + if (!PyArg_ParseTuple(args, "nn", &i, &j)) return NULL; return (*func)(self, i, j); } @@ -3589,8 +3589,9 @@ static PyObject * wrap_sq_setitem(PyObject *self, PyObject *args, void *wrapped) { - intobjargproc func = (intobjargproc)wrapped; - int i, res; + ssizeobjargproc func = (ssizeobjargproc)wrapped; + Py_ssize_t i; + int res; PyObject *arg, *value; if (!PyArg_UnpackTuple(args, "", 2, 2, &arg, &value)) @@ -3608,8 +3609,9 @@ static PyObject * wrap_sq_delitem(PyObject *self, PyObject *args, void *wrapped) { - intobjargproc func = (intobjargproc)wrapped; - int i, res; + ssizeobjargproc func = (ssizeobjargproc)wrapped; + Py_ssize_t i; + int res; PyObject *arg; if (!check_num_args(args, 1)) @@ -3626,13 +3628,14 @@ } static PyObject * -wrap_intintobjargproc(PyObject *self, PyObject *args, void *wrapped) +wrap_ssizessizeobjargproc(PyObject *self, PyObject *args, void *wrapped) { - intintobjargproc func = (intintobjargproc)wrapped; - int i, j, res; + ssizessizeobjargproc func = (ssizessizeobjargproc)wrapped; + Py_ssize_t i, j; + int res; PyObject *value; - if (!PyArg_ParseTuple(args, "iiO", &i, &j, &value)) + if (!PyArg_ParseTuple(args, "nnO", &i, &j, &value)) return NULL; res = (*func)(self, i, j, value); if (res == -1 && PyErr_Occurred()) @@ -3644,10 +3647,11 @@ static PyObject * wrap_delslice(PyObject *self, PyObject *args, void *wrapped) { - intintobjargproc func = (intintobjargproc)wrapped; - int i, j, res; + ssizessizeobjargproc func = (ssizessizeobjargproc)wrapped; + Py_ssize_t i, j; + int res; - if (!PyArg_ParseTuple(args, "ii", &i, &j)) + if (!PyArg_ParseTuple(args, "nn", &i, &j)) return NULL; res = (*func)(self, i, j, NULL); if (res == -1 && PyErr_Occurred()) @@ -4978,7 +4982,7 @@ SQSLOT("__delitem__", sq_ass_item, slot_sq_ass_item, wrap_sq_delitem, "x.__delitem__(y) <==> del x[y]"), SQSLOT("__setslice__", sq_ass_slice, slot_sq_ass_slice, - wrap_intintobjargproc, + wrap_ssizessizeobjargproc, "x.__setslice__(i, j, y) <==> x[i:j]=y\n\ \n\ Use of negative indices is not supported."), From python-checkins at python.org Tue Jan 3 10:49:20 2006 From: python-checkins at python.org (martin.v.loewis) Date: Tue, 3 Jan 2006 10:49:20 +0100 (CET) Subject: [Python-checkins] commit of r41904 - in python/branches/ssize_t: Include/abstract.h Include/object.h Modules/_bsddb.c Modules/arraymodule.c Modules/bsddbmodule.c Modules/collectionsmodule.c Modules/dbmmodule.c Modules/gdbmmodule.c Modules/mmapmodule.c Objects/abstract.c Objects/bufferobject.c Objects/classobject.c Objects/descrobject.c Objects/dictobject.c Objects/listobject.c Objects/object.c Objects/rangeobject.c Objects/setobject.c Objects/stringobject.c Objects/tupleobject.c Objects/typeobject.c Objects/unicodeobject.c Objects/weakrefobject.c Message-ID: <20060103094920.B00631E4002@bag.python.org> Author: martin.v.loewis Date: Tue Jan 3 10:49:11 2006 New Revision: 41904 Modified: python/branches/ssize_t/Include/abstract.h python/branches/ssize_t/Include/object.h python/branches/ssize_t/Modules/_bsddb.c python/branches/ssize_t/Modules/arraymodule.c python/branches/ssize_t/Modules/bsddbmodule.c python/branches/ssize_t/Modules/collectionsmodule.c python/branches/ssize_t/Modules/dbmmodule.c python/branches/ssize_t/Modules/gdbmmodule.c python/branches/ssize_t/Modules/mmapmodule.c python/branches/ssize_t/Objects/abstract.c python/branches/ssize_t/Objects/bufferobject.c python/branches/ssize_t/Objects/classobject.c python/branches/ssize_t/Objects/descrobject.c python/branches/ssize_t/Objects/dictobject.c python/branches/ssize_t/Objects/listobject.c python/branches/ssize_t/Objects/object.c python/branches/ssize_t/Objects/rangeobject.c python/branches/ssize_t/Objects/setobject.c python/branches/ssize_t/Objects/stringobject.c python/branches/ssize_t/Objects/tupleobject.c python/branches/ssize_t/Objects/typeobject.c python/branches/ssize_t/Objects/unicodeobject.c python/branches/ssize_t/Objects/weakrefobject.c Log: Make Length/Size return Py_ssize_t throughout. Modified: python/branches/ssize_t/Include/abstract.h ============================================================================== --- python/branches/ssize_t/Include/abstract.h (original) +++ python/branches/ssize_t/Include/abstract.h Tue Jan 3 10:49:11 2006 @@ -407,7 +407,7 @@ equivalent to the Python expression: type(o). */ - PyAPI_FUNC(int) PyObject_Size(PyObject *o); + PyAPI_FUNC(Py_ssize_t) PyObject_Size(PyObject *o); /* Return the size of object o. If the object, o, provides @@ -419,7 +419,7 @@ /* For DLL compatibility */ #undef PyObject_Length - PyAPI_FUNC(int) PyObject_Length(PyObject *o); + PyAPI_FUNC(Py_ssize_t) PyObject_Length(PyObject *o); #define PyObject_Length PyObject_Size PyAPI_FUNC(int) _PyObject_LengthCue(PyObject *o); @@ -906,7 +906,7 @@ */ - PyAPI_FUNC(int) PySequence_Size(PyObject *o); + PyAPI_FUNC(Py_ssize_t) PySequence_Size(PyObject *o); /* Return the size of sequence object o, or -1 on failure. @@ -915,7 +915,7 @@ /* For DLL compatibility */ #undef PySequence_Length - PyAPI_FUNC(int) PySequence_Length(PyObject *o); + PyAPI_FUNC(Py_ssize_t) PySequence_Length(PyObject *o); #define PySequence_Length PySequence_Size @@ -1120,7 +1120,7 @@ This function always succeeds. */ - PyAPI_FUNC(int) PyMapping_Size(PyObject *o); + PyAPI_FUNC(Py_ssize_t) PyMapping_Size(PyObject *o); /* Returns the number of keys in object o on success, and -1 on @@ -1130,7 +1130,7 @@ /* For DLL compatibility */ #undef PyMapping_Length - PyAPI_FUNC(int) PyMapping_Length(PyObject *o); + PyAPI_FUNC(Py_ssize_t) PyMapping_Length(PyObject *o); #define PyMapping_Length PyMapping_Size Modified: python/branches/ssize_t/Include/object.h ============================================================================== --- python/branches/ssize_t/Include/object.h (original) +++ python/branches/ssize_t/Include/object.h Tue Jan 3 10:49:11 2006 @@ -128,6 +128,7 @@ typedef PyObject * (*binaryfunc)(PyObject *, PyObject *); typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *); typedef int (*inquiry)(PyObject *); +typedef Py_ssize_t (*lenfunc)(PyObject *); typedef int (*coercion)(PyObject **, PyObject **); typedef PyObject *(*intargfunc)(PyObject *, int) Py_DEPRECATED(2.5); typedef PyObject *(*intintargfunc)(PyObject *, int, int) Py_DEPRECATED(2.5); @@ -200,7 +201,7 @@ } PyNumberMethods; typedef struct { - inquiry sq_length; + lenfunc sq_length; binaryfunc sq_concat; ssizeargfunc sq_repeat; ssizeargfunc sq_item; @@ -214,7 +215,7 @@ } PySequenceMethods; typedef struct { - inquiry mp_length; + lenfunc mp_length; binaryfunc mp_subscript; objobjargproc mp_ass_subscript; } PyMappingMethods; Modified: python/branches/ssize_t/Modules/_bsddb.c ============================================================================== --- python/branches/ssize_t/Modules/_bsddb.c (original) +++ python/branches/ssize_t/Modules/_bsddb.c Tue Jan 3 10:49:11 2006 @@ -2603,7 +2603,7 @@ /*-------------------------------------------------------------- */ /* Mapping and Dictionary-like access routines */ -int DB_length(DBObject* self) +Py_ssize_t DB_length(DBObject* self) { int err; long size = 0; @@ -4641,7 +4641,7 @@ static PyMappingMethods DB_mapping = { - (inquiry)DB_length, /*mp_length*/ + (lenfunc)DB_length, /*mp_length*/ (binaryfunc)DB_subscript, /*mp_subscript*/ (objobjargproc)DB_ass_sub, /*mp_ass_subscript*/ }; Modified: python/branches/ssize_t/Modules/arraymodule.c ============================================================================== --- python/branches/ssize_t/Modules/arraymodule.c (original) +++ python/branches/ssize_t/Modules/arraymodule.c Tue Jan 3 10:49:11 2006 @@ -1742,7 +1742,7 @@ } static PyMappingMethods array_as_mapping = { - (inquiry)array_length, + (lenfunc)array_length, (binaryfunc)array_subscr, (objobjargproc)array_ass_subscr }; @@ -1780,7 +1780,7 @@ } static PySequenceMethods array_as_sequence = { - (inquiry)array_length, /*sq_length*/ + (lenfunc)array_length, /*sq_length*/ (binaryfunc)array_concat, /*sq_concat*/ (ssizeargfunc)array_repeat, /*sq_repeat*/ (ssizeargfunc)array_item, /*sq_item*/ Modified: python/branches/ssize_t/Modules/bsddbmodule.c ============================================================================== --- python/branches/ssize_t/Modules/bsddbmodule.c (original) +++ python/branches/ssize_t/Modules/bsddbmodule.c Tue Jan 3 10:49:11 2006 @@ -240,7 +240,7 @@ #define BSDDB_END_SAVE(_dp) Py_END_ALLOW_THREADS #endif -static int +static Py_ssize_t bsddb_length(bsddbobject *dp) { check_bsddbobject_open(dp, -1); @@ -374,7 +374,7 @@ } static PyMappingMethods bsddb_as_mapping = { - (inquiry)bsddb_length, /*mp_length*/ + (lenfunc)bsddb_length, /*mp_length*/ (binaryfunc)bsddb_subscript, /*mp_subscript*/ (objobjargproc)bsddb_ass_sub, /*mp_ass_subscript*/ }; Modified: python/branches/ssize_t/Modules/collectionsmodule.c ============================================================================== --- python/branches/ssize_t/Modules/collectionsmodule.c (original) +++ python/branches/ssize_t/Modules/collectionsmodule.c Tue Jan 3 10:49:11 2006 @@ -365,7 +365,7 @@ PyDoc_STRVAR(rotate_doc, "Rotate the deque n steps to the right (default n=1). If n is negative, rotates left."); -static int +static Py_ssize_t deque_len(dequeobject *deque) { return deque->len; @@ -776,7 +776,7 @@ } static PySequenceMethods deque_as_sequence = { - (inquiry)deque_len, /* sq_length */ + (lenfunc)deque_len, /* sq_length */ 0, /* sq_concat */ 0, /* sq_repeat */ (ssizeargfunc)deque_item, /* sq_item */ Modified: python/branches/ssize_t/Modules/dbmmodule.c ============================================================================== --- python/branches/ssize_t/Modules/dbmmodule.c (original) +++ python/branches/ssize_t/Modules/dbmmodule.c Tue Jan 3 10:49:11 2006 @@ -70,7 +70,7 @@ PyObject_Del(dp); } -static int +static Py_ssize_t dbm_length(dbmobject *dp) { if (dp->di_dbm == NULL) { @@ -162,7 +162,7 @@ } static PyMappingMethods dbm_as_mapping = { - (inquiry)dbm_length, /*mp_length*/ + (lenfunc)dbm_length, /*mp_length*/ (binaryfunc)dbm_subscript, /*mp_subscript*/ (objobjargproc)dbm_ass_sub, /*mp_ass_subscript*/ }; Modified: python/branches/ssize_t/Modules/gdbmmodule.c ============================================================================== --- python/branches/ssize_t/Modules/gdbmmodule.c (original) +++ python/branches/ssize_t/Modules/gdbmmodule.c Tue Jan 3 10:49:11 2006 @@ -86,7 +86,7 @@ PyObject_Del(dp); } -static int +static Py_ssize_t dbm_length(dbmobject *dp) { if (dp->di_dbm == NULL) { @@ -178,7 +178,7 @@ } static PyMappingMethods dbm_as_mapping = { - (inquiry)dbm_length, /*mp_length*/ + (lenfunc)dbm_length, /*mp_length*/ (binaryfunc)dbm_subscript, /*mp_subscript*/ (objobjargproc)dbm_ass_sub, /*mp_ass_subscript*/ }; Modified: python/branches/ssize_t/Modules/mmapmodule.c ============================================================================== --- python/branches/ssize_t/Modules/mmapmodule.c (original) +++ python/branches/ssize_t/Modules/mmapmodule.c Tue Jan 3 10:49:11 2006 @@ -633,7 +633,7 @@ return Py_FindMethod (mmap_object_methods, (PyObject *)self, name); } -static int +static Py_ssize_t mmap_length(mmap_object *self) { CHECK_VALID(-1); @@ -754,7 +754,7 @@ } static PySequenceMethods mmap_as_sequence = { - (inquiry)mmap_length, /*sq_length*/ + (lenfunc)mmap_length, /*sq_length*/ (binaryfunc)mmap_concat, /*sq_concat*/ (ssizeargfunc)mmap_repeat, /*sq_repeat*/ (ssizeargfunc)mmap_item, /*sq_item*/ Modified: python/branches/ssize_t/Objects/abstract.c ============================================================================== --- python/branches/ssize_t/Objects/abstract.c (original) +++ python/branches/ssize_t/Objects/abstract.c Tue Jan 3 10:49:11 2006 @@ -56,7 +56,7 @@ return v; } -int +Py_ssize_t PyObject_Size(PyObject *o) { PySequenceMethods *m; @@ -1236,7 +1236,7 @@ } PyObject * -PySequence_GetItem(PyObject *s, Py_ssize_t i) /* XXX negative values */ +PySequence_GetItem(PyObject *s, Py_ssize_t i) { PySequenceMethods *m; @@ -1247,7 +1247,7 @@ if (m && m->sq_item) { if (i < 0) { if (m->sq_length) { - int l = (*m->sq_length)(s); + Py_ssize_t l = (*m->sq_length)(s); if (l < 0) return NULL; i += l; @@ -1289,7 +1289,7 @@ if (m && m->sq_slice) { if (i1 < 0 || i2 < 0) { if (m->sq_length) { - int l = (*m->sq_length)(s); + Py_ssize_t l = (*m->sq_length)(s); if (l < 0) return NULL; if (i1 < 0) @@ -1326,7 +1326,7 @@ if (m && m->sq_ass_item) { if (i < 0) { if (m->sq_length) { - int l = (*m->sq_length)(s); + Py_ssize_t l = (*m->sq_length)(s); if (l < 0) return -1; i += l; @@ -1353,7 +1353,7 @@ if (m && m->sq_ass_item) { if (i < 0) { if (m->sq_length) { - int l = (*m->sq_length)(s); + Py_ssize_t l = (*m->sq_length)(s); if (l < 0) return -1; i += l; @@ -1381,7 +1381,7 @@ if (m && m->sq_ass_slice) { if (i1 < 0 || i2 < 0) { if (m->sq_length) { - int l = (*m->sq_length)(s); + Py_ssize_t l = (*m->sq_length)(s); if (l < 0) return -1; if (i1 < 0) @@ -1419,7 +1419,7 @@ if (m && m->sq_ass_slice) { if (i1 < 0 || i2 < 0) { if (m->sq_length) { - int l = (*m->sq_length)(s); + Py_ssize_t l = (*m->sq_length)(s); if (l < 0) return -1; if (i1 < 0) @@ -1708,7 +1708,7 @@ o->ob_type->tp_as_sequence->sq_slice); } -int +Py_ssize_t PyMapping_Size(PyObject *o) { PyMappingMethods *m; @@ -1727,7 +1727,7 @@ } #undef PyMapping_Length -int +Py_ssize_t PyMapping_Length(PyObject *o) { return PyMapping_Size(o); Modified: python/branches/ssize_t/Objects/bufferobject.c ============================================================================== --- python/branches/ssize_t/Objects/bufferobject.c (original) +++ python/branches/ssize_t/Objects/bufferobject.c Tue Jan 3 10:49:11 2006 @@ -308,7 +308,7 @@ /* Sequence methods */ -static int +static Py_ssize_t buffer_length(PyBufferObject *self) { void *ptr; @@ -596,7 +596,7 @@ static PySequenceMethods buffer_as_sequence = { - (inquiry)buffer_length, /*sq_length*/ + (lenfunc)buffer_length, /*sq_length*/ (binaryfunc)buffer_concat, /*sq_concat*/ (ssizeargfunc)buffer_repeat, /*sq_repeat*/ (ssizeargfunc)buffer_item, /*sq_item*/ Modified: python/branches/ssize_t/Objects/classobject.c ============================================================================== --- python/branches/ssize_t/Objects/classobject.c (original) +++ python/branches/ssize_t/Objects/classobject.c Tue Jan 3 10:49:11 2006 @@ -996,12 +996,12 @@ static PyObject *getitemstr, *setitemstr, *delitemstr, *lenstr; static PyObject *iterstr, *nextstr; -static int +static Py_ssize_t instance_length(PyInstanceObject *inst) { PyObject *func; PyObject *res; - int outcome; + Py_ssize_t outcome; if (lenstr == NULL) lenstr = PyString_InternFromString("__len__"); @@ -1013,9 +1013,13 @@ if (res == NULL) return -1; if (PyInt_Check(res)) { - long temp = PyInt_AsLong(res); - outcome = (int)temp; -#if SIZEOF_INT < SIZEOF_LONG + Py_ssize_t temp = PyInt_AsSsize_t(res); + if (temp == -1 && PyErr_Occurred()) { + Py_DECREF(res); + return -1; + } + outcome = (Py_ssize_t)temp; +#if SIZEOF_SIZE_T < SIZEOF_LONG /* Overflow check -- range of PyInt is more than C int */ if (outcome != temp) { PyErr_SetString(PyExc_OverflowError, @@ -1097,7 +1101,7 @@ } static PyMappingMethods instance_as_mapping = { - (inquiry)instance_length, /* mp_length */ + (lenfunc)instance_length, /* mp_length */ (binaryfunc)instance_subscript, /* mp_subscript */ (objobjargproc)instance_ass_subscript, /* mp_ass_subscript */ }; @@ -1322,7 +1326,7 @@ static PySequenceMethods instance_as_sequence = { - (inquiry)instance_length, /* sq_length */ + (lenfunc)instance_length, /* sq_length */ 0, /* sq_concat */ 0, /* sq_repeat */ (ssizeargfunc)instance_item, /* sq_item */ Modified: python/branches/ssize_t/Objects/descrobject.c ============================================================================== --- python/branches/ssize_t/Objects/descrobject.c (original) +++ python/branches/ssize_t/Objects/descrobject.c Tue Jan 3 10:49:11 2006 @@ -669,7 +669,7 @@ PyObject *dict; } proxyobject; -static int +static Py_ssize_t proxy_len(proxyobject *pp) { return PyObject_Size(pp->dict); @@ -682,7 +682,7 @@ } static PyMappingMethods proxy_as_mapping = { - (inquiry)proxy_len, /* mp_length */ + (lenfunc)proxy_len, /* mp_length */ (binaryfunc)proxy_getitem, /* mp_subscript */ 0, /* mp_ass_subscript */ }; Modified: python/branches/ssize_t/Objects/dictobject.c ============================================================================== --- python/branches/ssize_t/Objects/dictobject.c (original) +++ python/branches/ssize_t/Objects/dictobject.c Tue Jan 3 10:49:11 2006 @@ -862,7 +862,7 @@ return result; } -static int +static Py_ssize_t dict_length(dictobject *mp) { return mp->ma_used; @@ -898,7 +898,7 @@ } static PyMappingMethods dict_as_mapping = { - (inquiry)dict_length, /*mp_length*/ + (lenfunc)dict_length, /*mp_length*/ (binaryfunc)dict_subscript, /*mp_subscript*/ (objobjargproc)dict_ass_sub, /*mp_ass_subscript*/ }; Modified: python/branches/ssize_t/Objects/listobject.c ============================================================================== --- python/branches/ssize_t/Objects/listobject.c (original) +++ python/branches/ssize_t/Objects/listobject.c Tue Jan 3 10:49:11 2006 @@ -363,7 +363,7 @@ return result; } -static int +static Py_ssize_t list_length(PyListObject *a) { return a->ob_size; @@ -2434,7 +2434,7 @@ }; static PySequenceMethods list_as_sequence = { - (inquiry)list_length, /* sq_length */ + (lenfunc)list_length, /* sq_length */ (binaryfunc)list_concat, /* sq_concat */ (ssizeargfunc)list_repeat, /* sq_repeat */ (ssizeargfunc)list_item, /* sq_item */ @@ -2641,7 +2641,7 @@ } static PyMappingMethods list_as_mapping = { - (inquiry)list_length, + (lenfunc)list_length, (binaryfunc)list_subscript, (objobjargproc)list_ass_subscript }; @@ -2876,7 +2876,7 @@ return NULL; } -static int +static Py_ssize_t listreviter_len(listreviterobject *it) { Py_ssize_t len = it->it_index + 1; @@ -2886,7 +2886,7 @@ } static PySequenceMethods listreviter_as_sequence = { - (inquiry)listreviter_len, /* sq_length */ + (lenfunc)listreviter_len, /* sq_length */ 0, /* sq_concat */ }; Modified: python/branches/ssize_t/Objects/object.c ============================================================================== --- python/branches/ssize_t/Objects/object.c (original) +++ python/branches/ssize_t/Objects/object.c Tue Jan 3 10:49:11 2006 @@ -1414,7 +1414,7 @@ int PyObject_IsTrue(PyObject *v) { - int res; + Py_ssize_t res; if (v == Py_True) return 1; if (v == Py_False) Modified: python/branches/ssize_t/Objects/rangeobject.c ============================================================================== --- python/branches/ssize_t/Objects/rangeobject.c (original) +++ python/branches/ssize_t/Objects/rangeobject.c Tue Jan 3 10:49:11 2006 @@ -101,17 +101,17 @@ return PyInt_FromLong(r->start + (i % r->len) * r->step); } -static int +static Py_ssize_t range_length(rangeobject *r) { -#if LONG_MAX != INT_MAX +#if LONG_MAX != INT_MAX /* XXX ssize_t_max */ if (r->len > INT_MAX) { PyErr_SetString(PyExc_ValueError, "xrange object size cannot be reported"); return -1; } #endif - return (int)(r->len); + return (Py_ssize_t)(r->len); } static PyObject * @@ -137,7 +137,7 @@ } static PySequenceMethods range_as_sequence = { - (inquiry)range_length, /* sq_length */ + (lenfunc)range_length, /* sq_length */ 0, /* sq_concat */ 0, /* sq_repeat */ (ssizeargfunc)range_item, /* sq_item */ Modified: python/branches/ssize_t/Objects/setobject.c ============================================================================== --- python/branches/ssize_t/Objects/setobject.c (original) +++ python/branches/ssize_t/Objects/setobject.c Tue Jan 3 10:49:11 2006 @@ -551,7 +551,7 @@ return result; } -static int +static Py_ssize_t set_len(PyObject *so) { return ((PySetObject *)so)->used; @@ -1687,7 +1687,7 @@ } static PySequenceMethods set_as_sequence = { - (inquiry)set_len, /* sq_length */ + (lenfunc)set_len, /* sq_length */ 0, /* sq_concat */ 0, /* sq_repeat */ 0, /* sq_item */ Modified: python/branches/ssize_t/Objects/stringobject.c ============================================================================== --- python/branches/ssize_t/Objects/stringobject.c (original) +++ python/branches/ssize_t/Objects/stringobject.c Tue Jan 3 10:49:11 2006 @@ -877,7 +877,7 @@ } } -static int +static Py_ssize_t string_length(PyStringObject *a) { return a->ob_size; @@ -1257,7 +1257,7 @@ } static PySequenceMethods string_as_sequence = { - (inquiry)string_length, /*sq_length*/ + (lenfunc)string_length, /*sq_length*/ (binaryfunc)string_concat, /*sq_concat*/ (ssizeargfunc)string_repeat, /*sq_repeat*/ (ssizeargfunc)string_item, /*sq_item*/ @@ -1268,7 +1268,7 @@ }; static PyMappingMethods string_as_mapping = { - (inquiry)string_length, + (lenfunc)string_length, (binaryfunc)string_subscript, 0, }; Modified: python/branches/ssize_t/Objects/tupleobject.c ============================================================================== --- python/branches/ssize_t/Objects/tupleobject.c (original) +++ python/branches/ssize_t/Objects/tupleobject.c Tue Jan 3 10:49:11 2006 @@ -572,7 +572,7 @@ "If the argument is a tuple, the return value is the same object."); static PySequenceMethods tuple_as_sequence = { - (inquiry)tuplelength, /* sq_length */ + (lenfunc)tuplelength, /* sq_length */ (binaryfunc)tupleconcat, /* sq_concat */ (ssizeargfunc)tuplerepeat, /* sq_repeat */ (ssizeargfunc)tupleitem, /* sq_item */ @@ -643,7 +643,7 @@ }; static PyMappingMethods tuple_as_mapping = { - (inquiry)tuplelength, + (lenfunc)tuplelength, (binaryfunc)tuplesubscript, 0 }; Modified: python/branches/ssize_t/Objects/typeobject.c ============================================================================== --- python/branches/ssize_t/Objects/typeobject.c (original) +++ python/branches/ssize_t/Objects/typeobject.c Tue Jan 3 10:49:11 2006 @@ -3386,10 +3386,10 @@ entries, one regular and one with reversed arguments. */ static PyObject * -wrap_inquiry(PyObject *self, PyObject *args, void *wrapped) +wrap_lenfunc(PyObject *self, PyObject *args, void *wrapped) { - inquiry func = (inquiry)wrapped; - int res; + lenfunc func = (lenfunc)wrapped; + Py_ssize_t res; if (!check_num_args(args, 0)) return NULL; @@ -4104,23 +4104,23 @@ "(" ARGCODES ")", arg1, arg2); \ } -static int +static Py_ssize_t slot_sq_length(PyObject *self) { static PyObject *len_str; PyObject *res = call_method(self, "__len__", &len_str, "()"); - long temp; - int len; + Py_ssize_t temp; + Py_ssize_t len; if (res == NULL) return -1; - temp = PyInt_AsLong(res); + temp = PyInt_AsSsize_t(res); len = (int)temp; Py_DECREF(res); if (len == -1 && PyErr_Occurred()) return -1; -#if SIZEOF_INT < SIZEOF_LONG - /* Overflow check -- range of PyInt is more than C int */ +#if SIZEOF_SIZE_T < SIZEOF_LONG + /* Overflow check -- range of PyInt is more than C ssize_t */ if (len != temp) { PyErr_SetString(PyExc_OverflowError, "__len__() should return 0 <= outcome < 2**31"); @@ -4958,7 +4958,7 @@ "x." NAME "(y) <==> " DOC) static slotdef slotdefs[] = { - SQSLOT("__len__", sq_length, slot_sq_length, wrap_inquiry, + SQSLOT("__len__", sq_length, slot_sq_length, wrap_lenfunc, "x.__len__() <==> len(x)"), /* Heap types defining __add__/__mul__ have sq_concat/sq_repeat == NULL. The logic in abstract.c always falls back to nb_add/nb_multiply in @@ -4997,7 +4997,7 @@ SQSLOT("__imul__", sq_inplace_repeat, NULL, wrap_ssizeargfunc, "x.__imul__(y) <==> x*=y"), - MPSLOT("__len__", mp_length, slot_mp_length, wrap_inquiry, + MPSLOT("__len__", mp_length, slot_mp_length, wrap_lenfunc, "x.__len__() <==> len(x)"), MPSLOT("__getitem__", mp_subscript, slot_mp_subscript, wrap_binaryfunc, Modified: python/branches/ssize_t/Objects/unicodeobject.c ============================================================================== --- python/branches/ssize_t/Objects/unicodeobject.c (original) +++ python/branches/ssize_t/Objects/unicodeobject.c Tue Jan 3 10:49:11 2006 @@ -5653,7 +5653,7 @@ return PyUnicode_Join(self, data); } -static int +static Py_ssize_t unicode_length(PyUnicodeObject *self) { return self->length; @@ -6449,7 +6449,7 @@ }; static PySequenceMethods unicode_as_sequence = { - (inquiry) unicode_length, /* sq_length */ + (lenfunc) unicode_length, /* sq_length */ (binaryfunc) PyUnicode_Concat, /* sq_concat */ (ssizeargfunc) unicode_repeat, /* sq_repeat */ (ssizeargfunc) unicode_getitem, /* sq_item */ @@ -6501,7 +6501,7 @@ } static PyMappingMethods unicode_as_mapping = { - (inquiry)unicode_length, /* mp_length */ + (lenfunc)unicode_length, /* mp_length */ (binaryfunc)unicode_subscript, /* mp_subscript */ (objobjargproc)0, /* mp_ass_subscript */ }; Modified: python/branches/ssize_t/Objects/weakrefobject.c ============================================================================== --- python/branches/ssize_t/Objects/weakrefobject.c (original) +++ python/branches/ssize_t/Objects/weakrefobject.c Tue Jan 3 10:49:11 2006 @@ -546,7 +546,7 @@ /* mapping slots */ -static int +static Py_ssize_t proxy_length(PyWeakReference *proxy) { if (!proxy_checkref(proxy)) @@ -625,7 +625,7 @@ }; static PySequenceMethods proxy_as_sequence = { - (inquiry)proxy_length, /*sq_length*/ + (lenfunc)proxy_length, /*sq_length*/ 0, /*sq_concat*/ 0, /*sq_repeat*/ 0, /*sq_item*/ @@ -636,7 +636,7 @@ }; static PyMappingMethods proxy_as_mapping = { - (inquiry)proxy_length, /*mp_length*/ + (lenfunc)proxy_length, /*mp_length*/ (binaryfunc)proxy_getitem, /*mp_subscript*/ (objobjargproc)proxy_setitem, /*mp_ass_subscript*/ }; From python-checkins at python.org Tue Jan 3 11:26:02 2006 From: python-checkins at python.org (martin.v.loewis) Date: Tue, 3 Jan 2006 11:26:02 +0100 (CET) Subject: [Python-checkins] commit of r41905 - in python/branches/ssize_t: Include/dictobject.h Modules/_testcapimodule.c Modules/cPickle.c Modules/parsermodule.c Modules/selectmodule.c Modules/shamodule.c Objects/abstract.c Objects/dictobject.c Objects/funcobject.c Objects/moduleobject.c Objects/object.c Objects/setobject.c Objects/typeobject.c Python/bltinmodule.c Python/compile.c Python/getargs.c Python/import.c Python/marshal.c Python/symtable.c Message-ID: <20060103102602.0C6DB1E4002@bag.python.org> Author: martin.v.loewis Date: Tue Jan 3 11:25:55 2006 New Revision: 41905 Modified: python/branches/ssize_t/Include/dictobject.h python/branches/ssize_t/Modules/_testcapimodule.c python/branches/ssize_t/Modules/cPickle.c python/branches/ssize_t/Modules/parsermodule.c python/branches/ssize_t/Modules/selectmodule.c python/branches/ssize_t/Modules/shamodule.c python/branches/ssize_t/Objects/abstract.c python/branches/ssize_t/Objects/dictobject.c python/branches/ssize_t/Objects/funcobject.c python/branches/ssize_t/Objects/moduleobject.c python/branches/ssize_t/Objects/object.c python/branches/ssize_t/Objects/setobject.c python/branches/ssize_t/Objects/typeobject.c python/branches/ssize_t/Python/bltinmodule.c python/branches/ssize_t/Python/compile.c python/branches/ssize_t/Python/getargs.c python/branches/ssize_t/Python/import.c python/branches/ssize_t/Python/marshal.c python/branches/ssize_t/Python/symtable.c Log: Adjust more places to ssize_t Size/Length/Dict_Next. Modified: python/branches/ssize_t/Include/dictobject.h ============================================================================== --- python/branches/ssize_t/Include/dictobject.h (original) +++ python/branches/ssize_t/Include/dictobject.h Tue Jan 3 11:25:55 2006 @@ -95,11 +95,11 @@ PyAPI_FUNC(int) PyDict_DelItem(PyObject *mp, PyObject *key); PyAPI_FUNC(void) PyDict_Clear(PyObject *mp); PyAPI_FUNC(int) PyDict_Next( - PyObject *mp, int *pos, PyObject **key, PyObject **value); + PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value); PyAPI_FUNC(PyObject *) PyDict_Keys(PyObject *mp); PyAPI_FUNC(PyObject *) PyDict_Values(PyObject *mp); PyAPI_FUNC(PyObject *) PyDict_Items(PyObject *mp); -PyAPI_FUNC(int) PyDict_Size(PyObject *mp); +PyAPI_FUNC(Py_ssize_t) PyDict_Size(PyObject *mp); PyAPI_FUNC(PyObject *) PyDict_Copy(PyObject *mp); PyAPI_FUNC(int) PyDict_Contains(PyObject *mp, PyObject *key); Modified: python/branches/ssize_t/Modules/_testcapimodule.c ============================================================================== --- python/branches/ssize_t/Modules/_testcapimodule.c (original) +++ python/branches/ssize_t/Modules/_testcapimodule.c Tue Jan 3 11:25:55 2006 @@ -115,7 +115,8 @@ static int test_dict_inner(int count) { - int pos = 0, iterations = 0, i; + Py_ssize_t pos = 0, iterations = 0; + int i; PyObject *dict = PyDict_New(); PyObject *v, *k; Modified: python/branches/ssize_t/Modules/cPickle.c ============================================================================== --- python/branches/ssize_t/Modules/cPickle.c (original) +++ python/branches/ssize_t/Modules/cPickle.c Tue Jan 3 11:25:55 2006 @@ -368,8 +368,8 @@ int *marks; int num_marks; int marks_size; - int (*read_func)(struct Unpicklerobject *, char **, int); - int (*readline_func)(struct Unpicklerobject *, char **); + Py_ssize_t (*read_func)(struct Unpicklerobject *, char **, Py_ssize_t); + Py_ssize_t (*readline_func)(struct Unpicklerobject *, char **); int buf_size; char *buf; PyObject *find_class; @@ -515,8 +515,8 @@ } -static int -read_file(Unpicklerobject *self, char **s, int n) +static Py_ssize_t +read_file(Unpicklerobject *self, char **s, Py_ssize_t n) { size_t nbytesread; @@ -559,7 +559,7 @@ } -static int +static Py_ssize_t readline_file(Unpicklerobject *self, char **s) { int i; @@ -598,8 +598,8 @@ } -static int -read_cStringIO(Unpicklerobject *self, char **s, int n) +static Py_ssize_t +read_cStringIO(Unpicklerobject *self, char **s, Py_ssize_t n) { char *ptr; @@ -614,10 +614,10 @@ } -static int +static Py_ssize_t readline_cStringIO(Unpicklerobject *self, char **s) { - int n; + Py_ssize_t n; char *ptr; if ((n = PycStringIO->creadline((PyObject *)self->file, &ptr)) < 0) { @@ -630,12 +630,12 @@ } -static int -read_other(Unpicklerobject *self, char **s, int n) +static Py_ssize_t +read_other(Unpicklerobject *self, char **s, Py_ssize_t n) { PyObject *bytes, *str=0; - if (!( bytes = PyInt_FromLong(n))) return -1; + if (!( bytes = PyInt_FromSsize_t(n))) return -1; ARG_TUP(self, bytes); if (self->arg) { @@ -652,11 +652,11 @@ } -static int +static Py_ssize_t readline_other(Unpicklerobject *self, char **s) { PyObject *str; - int str_size; + Py_ssize_t str_size; if (!( str = PyObject_CallObject(self->readline, empty_tuple))) { return -1; @@ -838,7 +838,7 @@ static PyObject * whichmodule(PyObject *global, PyObject *global_name) { - int i, j; + Py_ssize_t i, j; PyObject *module = 0, *modules_dict = 0, *global_name_attr = 0, *name = 0; @@ -3290,7 +3290,7 @@ static int load_counted_long(Unpicklerobject *self, int size) { - int i; + Py_ssize_t i; char *nbytes; unsigned char *pdata; PyObject *along; @@ -4263,7 +4263,7 @@ PyObject *state, *inst, *slotstate; PyObject *__setstate__; PyObject *d_key, *d_value; - int i; + Py_ssize_t i; int res = -1; /* Stack is ... instance, state. We want to leave instance at @@ -5720,7 +5720,7 @@ initcPickle(void) { PyObject *m, *d, *di, *v, *k; - int i; + Py_ssize_t i; char *rev = "1.71"; /* XXX when does this change? */ PyObject *format_version; PyObject *compatible_formats; Modified: python/branches/ssize_t/Modules/parsermodule.c ============================================================================== --- python/branches/ssize_t/Modules/parsermodule.c (original) +++ python/branches/ssize_t/Modules/parsermodule.c Tue Jan 3 11:25:55 2006 @@ -632,8 +632,9 @@ static node* build_node_children(PyObject *tuple, node *root, int *line_num) { - int len = PyObject_Size(tuple); - int i, err; + Py_ssize_t len = PyObject_Size(tuple); + Py_ssize_t i; + int err; for (i = 1; i < len; ++i) { /* elem must always be a sequence, however simple */ @@ -663,7 +664,7 @@ return (0); } if (ISTERMINAL(type)) { - int len = PyObject_Size(elem); + Py_ssize_t len = PyObject_Size(elem); PyObject *temp; if ((len != 2) && (len != 3)) { Modified: python/branches/ssize_t/Modules/selectmodule.c ============================================================================== --- python/branches/ssize_t/Modules/selectmodule.c (original) +++ python/branches/ssize_t/Modules/selectmodule.c Tue Jan 3 11:25:55 2006 @@ -340,7 +340,7 @@ static int update_ufd_array(pollObject *self) { - int i, pos; + Py_ssize_t i, pos; PyObject *key, *value; self->ufd_len = PyDict_Size(self->dict); Modified: python/branches/ssize_t/Modules/shamodule.c ============================================================================== --- python/branches/ssize_t/Modules/shamodule.c (original) +++ python/branches/ssize_t/Modules/shamodule.c Tue Jan 3 11:25:55 2006 @@ -543,7 +543,7 @@ static PyObject * SHA_new(PyObject *self, PyObject *args, PyObject *kwdict) { - static char *kwlist[] = {"string", NULL}; + static const char *kwlist[] = {"string", NULL}; SHAobject *new; unsigned char *cp = NULL; int len; Modified: python/branches/ssize_t/Objects/abstract.c ============================================================================== --- python/branches/ssize_t/Objects/abstract.c (original) +++ python/branches/ssize_t/Objects/abstract.c Tue Jan 3 11:25:55 2006 @@ -74,7 +74,7 @@ } #undef PyObject_Length -int +Py_ssize_t PyObject_Length(PyObject *o) { return PyObject_Size(o); @@ -84,7 +84,7 @@ int _PyObject_LengthCue(PyObject *o) { - int rv = PyObject_Size(o); + Py_ssize_t rv = PyObject_Size(o); if (rv != -1) return rv; if (PyErr_ExceptionMatches(PyExc_TypeError) || @@ -1103,7 +1103,7 @@ s->ob_type->tp_as_sequence->sq_item != NULL; } -int +Py_ssize_t PySequence_Size(PyObject *s) { PySequenceMethods *m; @@ -1122,7 +1122,7 @@ } #undef PySequence_Length -int +Py_ssize_t PySequence_Length(PyObject *s) { return PySequence_Size(s); Modified: python/branches/ssize_t/Objects/dictobject.c ============================================================================== --- python/branches/ssize_t/Objects/dictobject.c (original) +++ python/branches/ssize_t/Objects/dictobject.c Tue Jan 3 11:25:55 2006 @@ -690,7 +690,7 @@ * delete keys), via PyDict_SetItem(). */ int -PyDict_Next(PyObject *op, int *ppos, PyObject **pkey, PyObject **pvalue) +PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue) { register int i, mask; register dictentry *ep; @@ -786,7 +786,7 @@ static PyObject * dict_repr(dictobject *mp) { - int i; + Py_ssize_t i; PyObject *s, *temp, *colon = NULL; PyObject *pieces = NULL, *result = NULL; PyObject *key, *value; @@ -1300,7 +1300,7 @@ return NULL; } -int +Py_ssize_t PyDict_Size(PyObject *mp) { if (mp == NULL || !PyDict_Check(mp)) { @@ -1708,7 +1708,8 @@ static int dict_traverse(PyObject *op, visitproc visit, void *arg) { - int i = 0, err; + Py_ssize_t i = 0; + int err; PyObject *pk; PyObject *pv; Modified: python/branches/ssize_t/Objects/funcobject.c ============================================================================== --- python/branches/ssize_t/Objects/funcobject.c (original) +++ python/branches/ssize_t/Objects/funcobject.c Tue Jan 3 11:25:55 2006 @@ -529,7 +529,7 @@ } if (kw != NULL && PyDict_Check(kw)) { - int pos, i; + Py_ssize_t pos, i; nk = PyDict_Size(kw); k = PyMem_NEW(PyObject *, 2*nk); if (k == NULL) { Modified: python/branches/ssize_t/Objects/moduleobject.c ============================================================================== --- python/branches/ssize_t/Objects/moduleobject.c (original) +++ python/branches/ssize_t/Objects/moduleobject.c Tue Jan 3 11:25:55 2006 @@ -104,7 +104,7 @@ None, rather than deleting them from the dictionary, to avoid rehashing the dictionary (to some extent). */ - int pos; + Py_ssize_t pos; PyObject *key, *value; PyObject *d; Modified: python/branches/ssize_t/Objects/object.c ============================================================================== --- python/branches/ssize_t/Objects/object.c (original) +++ python/branches/ssize_t/Objects/object.c Tue Jan 3 11:25:55 2006 @@ -1949,7 +1949,7 @@ /* Hack to force loading of abstract.o */ -int (*_Py_abstract_hack)(PyObject *) = PyObject_Size; +Py_ssize_t (*_Py_abstract_hack)(PyObject *) = PyObject_Size; /* Python's malloc wrappers (see pymem.h) */ Modified: python/branches/ssize_t/Objects/setobject.c ============================================================================== --- python/branches/ssize_t/Objects/setobject.c (original) +++ python/branches/ssize_t/Objects/setobject.c Tue Jan 3 11:25:55 2006 @@ -468,9 +468,10 @@ * mutates the table. */ static int -set_next(PySetObject *so, int *pos_ptr, setentry **entry_ptr) +set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr) { - register int i, mask; + Py_ssize_t i; + int mask; register setentry *table; assert (PyAnySet_Check(so)); @@ -517,7 +518,7 @@ set_tp_print(PySetObject *so, FILE *fp, int flags) { setentry *entry; - int pos=0; + Py_ssize_t pos=0; char *emit = ""; /* No separator emitted on first pass */ char *separator = ", "; @@ -673,7 +674,7 @@ static int set_traverse(PySetObject *so, visitproc visit, void *arg) { - int pos = 0; + Py_ssize_t pos = 0; setentry *entry; while (set_next(so, &pos, &entry)) @@ -687,7 +688,7 @@ PySetObject *so = (PySetObject *)self; long h, hash = 1927868237L; setentry *entry; - int pos = 0; + Py_ssize_t pos = 0; if (so->hash != -1) return so->hash; @@ -847,7 +848,7 @@ if (PyDict_Check(other)) { PyObject *key, *value; - int pos = 0; + Py_ssize_t pos = 0; while (PyDict_Next(other, &pos, &key, &value)) { if (set_add_key(so, key) == -1) return -1; @@ -1121,7 +1122,7 @@ return NULL; if (PyAnySet_Check(other)) { - int pos = 0; + Py_ssize_t pos = 0; setentry *entry; if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) { @@ -1222,7 +1223,7 @@ if (PyAnySet_Check(other)) { setentry *entry; - int pos = 0; + Py_ssize_t pos = 0; while (set_next((PySetObject *)other, &pos, &entry)) set_discard_entry(so, entry); @@ -1266,7 +1267,7 @@ { PyObject *result; setentry *entry; - int pos = 0; + Py_ssize_t pos = 0; if (!PyAnySet_Check(other) && !PyDict_Check(other)) { result = set_copy(so); @@ -1340,7 +1341,7 @@ { PySetObject *otherset; PyObject *key; - int pos = 0; + Py_ssize_t pos = 0; setentry *entry; if ((PyObject *)so == other) @@ -1442,7 +1443,7 @@ set_issubset(PySetObject *so, PyObject *other) { setentry *entry; - int pos = 0; + Py_ssize_t pos = 0; if (!PyAnySet_Check(other)) { PyObject *tmp, *result; Modified: python/branches/ssize_t/Objects/typeobject.c ============================================================================== --- python/branches/ssize_t/Objects/typeobject.c (original) +++ python/branches/ssize_t/Objects/typeobject.c Tue Jan 3 11:25:55 2006 @@ -1101,7 +1101,7 @@ static void set_mro_error(PyObject *to_merge, int *remain) { - int i, n, off, to_merge_size; + Py_ssize_t i, n, off, to_merge_size; char buf[1000]; PyObject *k, *v; PyObject *set = PyDict_New(); Modified: python/branches/ssize_t/Python/bltinmodule.c ============================================================================== --- python/branches/ssize_t/Python/bltinmodule.c (original) +++ python/branches/ssize_t/Python/bltinmodule.c Tue Jan 3 11:25:55 2006 @@ -824,7 +824,7 @@ PyObject *func, *result; sequence *seqs = NULL, *sqp; - int n, len; + Py_ssize_t n, len; register int i, j; n = PyTuple_Size(args); @@ -1163,12 +1163,12 @@ static PyObject * builtin_len(PyObject *self, PyObject *v) { - long res; + Py_ssize_t res; res = PyObject_Size(v); if (res < 0 && PyErr_Occurred()) return NULL; - return PyInt_FromLong(res); + return PyInt_FromSsize_t(res); } PyDoc_STRVAR(len_doc, @@ -2346,8 +2346,8 @@ filtertuple(PyObject *func, PyObject *tuple) { PyObject *result; - register int i, j; - int len = PyTuple_Size(tuple); + Py_ssize_t i, j; + Py_ssize_t len = PyTuple_Size(tuple); if (len == 0) { if (PyTuple_CheckExact(tuple)) @@ -2417,9 +2417,9 @@ filterstring(PyObject *func, PyObject *strobj) { PyObject *result; - register int i, j; - int len = PyString_Size(strobj); - int outlen = len; + Py_ssize_t i, j; + Py_ssize_t len = PyString_Size(strobj); + Py_ssize_t outlen = len; if (func == Py_None) { /* If it's a real string we can return the original, Modified: python/branches/ssize_t/Python/compile.c ============================================================================== --- python/branches/ssize_t/Python/compile.c (original) +++ python/branches/ssize_t/Python/compile.c Tue Jan 3 11:25:55 2006 @@ -317,7 +317,7 @@ static PyObject * list2dict(PyObject *list) { - int i, n; + Py_ssize_t i, n; PyObject *v, *k, *dict = PyDict_New(); n = PyList_Size(list); @@ -352,7 +352,7 @@ static PyObject * dictbytype(PyObject *src, int scope_type, int flag, int offset) { - int pos = 0, i = offset, scope; + Py_ssize_t pos = 0, i = offset, scope; PyObject *k, *v, *dest = PyDict_New(); assert(offset >= 0); @@ -407,7 +407,7 @@ tuple_of_constants(unsigned char *codestr, int n, PyObject *consts) { PyObject *newconst, *constant; - int i, arg, len_consts; + Py_ssize_t i, arg, len_consts; /* Pre-conditions */ assert(PyList_CheckExact(consts)); @@ -458,7 +458,8 @@ fold_binops_on_constants(unsigned char *codestr, PyObject *consts) { PyObject *newconst, *v, *w; - int len_consts, opcode, size; + Py_ssize_t len_consts, size; + int opcode; /* Pre-conditions */ assert(PyList_CheckExact(consts)); @@ -551,7 +552,8 @@ fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts) { PyObject *newconst=NULL, *v; - int len_consts, opcode; + Py_ssize_t len_consts; + int opcode; /* Pre-conditions */ assert(PyList_CheckExact(consts)); @@ -653,7 +655,8 @@ static PyObject * optimize_code(PyObject *code, PyObject* consts, PyObject *names, PyObject *lineno_obj) { - int i, j, codelen, nops, h, adj; + Py_ssize_t i, j, codelen; + int nops, h, adj; int tgt, tgttgt, opcode; unsigned char *codestr = NULL; unsigned char *lineno; @@ -989,7 +992,8 @@ compiler_display_symbols(PyObject *name, PyObject *symbols) { PyObject *key, *value; - int flags, pos = 0; + int flags; + Py_ssize_t pos = 0; fprintf(stderr, "block %s\n", PyString_AS_STRING(name)); while (PyDict_Next(symbols, &pos, &key, &value)) { @@ -1498,7 +1502,7 @@ compiler_add_o(struct compiler *c, PyObject *dict, PyObject *o) { PyObject *t, *v; - int arg; + Py_ssize_t arg; /* necessary to make sure types aren't coerced (e.g., int and long) */ t = PyTuple_Pack(2, o, o->ob_type); @@ -4032,7 +4036,7 @@ dict_keys_inorder(PyObject *dict, int offset) { PyObject *tuple, *k, *v; - int i, pos = 0, size = PyDict_Size(dict); + Py_ssize_t i, pos = 0, size = PyDict_Size(dict); tuple = PyTuple_New(size); if (tuple == NULL) Modified: python/branches/ssize_t/Python/getargs.c ============================================================================== --- python/branches/ssize_t/Python/getargs.c (original) +++ python/branches/ssize_t/Python/getargs.c Tue Jan 3 11:25:55 2006 @@ -1505,7 +1505,7 @@ /* make sure there are no extraneous keyword arguments */ if (nkeywords > 0) { PyObject *key, *value; - int pos = 0; + Py_ssize_t pos = 0; while (PyDict_Next(keywords, &pos, &key, &value)) { int match = 0; char *ks; Modified: python/branches/ssize_t/Python/import.c ============================================================================== --- python/branches/ssize_t/Python/import.c (original) +++ python/branches/ssize_t/Python/import.c Tue Jan 3 11:25:55 2006 @@ -351,7 +351,7 @@ void PyImport_Cleanup(void) { - int pos, ndone; + Py_ssize_t pos, ndone; char *name; PyObject *key, *value, *dict; PyInterpreterState *interp = PyThreadState_GET()->interp; Modified: python/branches/ssize_t/Python/marshal.c ============================================================================== --- python/branches/ssize_t/Python/marshal.c (original) +++ python/branches/ssize_t/Python/marshal.c Tue Jan 3 11:25:55 2006 @@ -59,7 +59,7 @@ static void w_more(int c, WFILE *p) { - int size, newsize; + Py_ssize_t size, newsize; if (p->str == NULL) return; /* An error already occurred */ size = PyString_Size(p->str); @@ -117,7 +117,7 @@ static void w_object(PyObject *v, WFILE *p) { - int i, n; + Py_ssize_t i, n; p->depth++; @@ -236,7 +236,7 @@ goto exit; } else { - o = PyInt_FromLong(PyDict_Size(p->strings)); + o = PyInt_FromSsize_t(PyDict_Size(p->strings)); PyDict_SetItem(p->strings, v, o); Py_DECREF(o); w_byte(TYPE_INTERNED, p); @@ -282,7 +282,7 @@ } } else if (PyDict_Check(v)) { - int pos; + Py_ssize_t pos; PyObject *key, *value; w_byte(TYPE_DICT, p); /* This one is NULL object terminated! */ Modified: python/branches/ssize_t/Python/symtable.c ============================================================================== --- python/branches/ssize_t/Python/symtable.c (original) +++ python/branches/ssize_t/Python/symtable.c Tue Jan 3 11:25:55 2006 @@ -426,7 +426,8 @@ analyze_cells(PyObject *scope, PyObject *free) { PyObject *name, *v, *w; - int flags, pos = 0, success = 0; + int flags, success = 0; + Py_ssize_t pos = 0; w = PyInt_FromLong(CELL); if (!w) @@ -506,7 +507,8 @@ PyObject *bound, PyObject *free, int class) { PyObject *name, *v, *u, *w, *free_value = NULL; - int i, flags, pos = 0; + int i, flags; + Py_ssize_t pos = 0; while (PyDict_Next(symbols, &pos, &name, &v)) { assert(PyInt_Check(v)); @@ -581,7 +583,8 @@ { PyObject *name, *v, *local = NULL, *scope = NULL, *newbound = NULL; PyObject *newglobal = NULL, *newfree = NULL; - int i, flags, pos = 0, success = 0; + int i, flags, success = 0; + Py_ssize_t pos = 0; local = PyDict_New(); if (!local) From python-checkins at python.org Tue Jan 3 14:16:55 2006 From: python-checkins at python.org (martin.v.loewis) Date: Tue, 3 Jan 2006 14:16:55 +0100 (CET) Subject: [Python-checkins] commit of r41906 - python/branches/ssize_t/Objects/obmalloc.c Message-ID: <20060103131655.201471E400A@bag.python.org> Author: martin.v.loewis Date: Tue Jan 3 14:16:53 2006 New Revision: 41906 Modified: python/branches/ssize_t/Objects/obmalloc.c Log: Disable 32-bit size limitation for 64-bit mode. Modified: python/branches/ssize_t/Objects/obmalloc.c ============================================================================== --- python/branches/ssize_t/Objects/obmalloc.c (original) +++ python/branches/ssize_t/Objects/obmalloc.c Tue Jan 3 14:16:53 2006 @@ -1005,6 +1005,8 @@ bumpserialno(); total = nbytes + 16; +#if SIZEOF_SIZE_T < 8 + /* XXX do this check only on 32-bit machines */ if (total < nbytes || (total >> 31) > 1) { /* overflow, or we can't represent it in 4 bytes */ /* Obscure: can't do (total >> 32) != 0 instead, because @@ -1013,6 +1015,7 @@ size_t is an unsigned type. */ return NULL; } +#endif p = (uchar *)PyObject_Malloc(total); if (p == NULL) From python-checkins at python.org Tue Jan 3 15:30:57 2006 From: python-checkins at python.org (barry.warsaw) Date: Tue, 3 Jan 2006 15:30:57 +0100 (CET) Subject: [Python-checkins] commit of r41907 - python/trunk/Makefile.pre.in Message-ID: <20060103143057.4DB8E1E401C@bag.python.org> Author: barry.warsaw Date: Tue Jan 3 15:30:55 2006 New Revision: 41907 Modified: python/trunk/Makefile.pre.in Log: SF bug #1395926: Also test for svnversion command on $PATH before using it to calculate the build number. Modified: python/trunk/Makefile.pre.in ============================================================================== --- python/trunk/Makefile.pre.in (original) +++ python/trunk/Makefile.pre.in Tue Jan 3 15:30:55 2006 @@ -349,7 +349,7 @@ $(SIGNAL_OBJS) \ $(MODOBJS) \ $(srcdir)/Modules/getbuildinfo.c - if test -d $(srcdir)/.svn; then \ + if test -d $(srcdir)/.svn -a ! -z "`type -t svnversion`" ; then \ svnversion $(srcdir) >buildno; \ elif test -f buildno; then \ expr `cat buildno` + 1 >buildno1; \ From nnorwitz at gmail.com Tue Jan 3 18:29:21 2006 From: nnorwitz at gmail.com (Neal Norwitz) Date: Tue, 3 Jan 2006 09:29:21 -0800 Subject: [Python-checkins] commit of r41903 - python/branches/ssize_t/Objects/typeobject.c In-Reply-To: <20060103090924.C0BCC1E4009@bag.python.org> References: <20060103090924.C0BCC1E4009@bag.python.org> Message-ID: On 1/3/06, martin.v.loewis wrote: > Log: > Update wrappers for ssize_t. > { > ssizeargfunc func = (ssizeargfunc)wrapped; > - int i; > + Py_ssize_t i; > > - if (!PyArg_ParseTuple(args, "i", &i)) > + if (!PyArg_ParseTuple(args, "n", &i)) > return NULL; > return (*func)(self, i); > } Thanks Martin. I realized I missed those last night. n From tim.peters at gmail.com Tue Jan 3 19:38:03 2006 From: tim.peters at gmail.com (Tim Peters) Date: Tue, 3 Jan 2006 13:38:03 -0500 Subject: [Python-checkins] commit of r41906 - python/branches/ssize_t/Objects/obmalloc.c In-Reply-To: <20060103131655.201471E400A@bag.python.org> References: <20060103131655.201471E400A@bag.python.org> Message-ID: <1f7befae0601031038w6f6c6f03o4549e039dafd19a8@mail.gmail.com> > Author: martin.v.loewis > Date: Tue Jan 3 14:16:53 2006 > New Revision: 41906 > > Modified: > python/branches/ssize_t/Objects/obmalloc.c > Log: > Disable 32-bit size limitation for 64-bit mode. > > > Modified: python/branches/ssize_t/Objects/obmalloc.c > ============================================================================== > --- python/branches/ssize_t/Objects/obmalloc.c (original) > +++ python/branches/ssize_t/Objects/obmalloc.c Tue Jan 3 14:16:53 2006 > @@ -1005,6 +1005,8 @@ > > bumpserialno(); > total = nbytes + 16; > +#if SIZEOF_SIZE_T < 8 > + /* XXX do this check only on 32-bit machines */ > if (total < nbytes || (total >> 31) > 1) { > /* overflow, or we can't represent it in 4 bytes */ > /* Obscure: can't do (total >> 32) != 0 instead, because > @@ -1013,6 +1015,7 @@ > size_t is an unsigned type. */ > return NULL; > } > +#endif This checkin should be reverted for now. It's in _PyObject_DebugMalloc, and at present the layout of the extra debug info in a PYMALLOC_DEBUG build is hard-coded to use 4-byte fields, no matter what sizeof(size_t) may be. One of the extra fields recorded in a PYMALLOC_DEBUG build is the number of bytes requested, and at present it's simply not capable of recording a value that doesn't fit in 4 bytes. Even after (if ever ;-)) this is changed to support recording 8-byte values on a box where sizeof(size_t) == 8, the "total < nbytes" part of the test would still be appropriate: PyObject_DebugMalloc requests more memory (`total`) than the user asked for (`nbytes`), and the computation of `total` may have overflowed. That's what "total < nbytes" is checking, and that's the right way to spell the overflow check on any box. From python-checkins at python.org Tue Jan 3 22:35:25 2006 From: python-checkins at python.org (phillip.eby) Date: Tue, 3 Jan 2006 22:35:25 +0100 (CET) Subject: [Python-checkins] commit of r41908 - sandbox/trunk/setuptools/EasyInstall.txt Message-ID: <20060103213525.542B41E4002@bag.python.org> Author: phillip.eby Date: Tue Jan 3 22:35:22 2006 New Revision: 41908 Modified: sandbox/trunk/setuptools/EasyInstall.txt Log: Tweak windows environment variable doc, per user feedback. Modified: sandbox/trunk/setuptools/EasyInstall.txt ============================================================================== --- sandbox/trunk/setuptools/EasyInstall.txt (original) +++ sandbox/trunk/setuptools/EasyInstall.txt Tue Jan 3 22:35:22 2006 @@ -98,9 +98,10 @@ most current versions of Windows, you can change the ``PATH`` by right-clicking "My Computer", choosing "Properties" and selecting the "Advanced" tab, then clicking the "Environment Variables" button. ``PATH`` will be in the "System -Variables" section, and you will probably need to reboot for the change to take -effect. Be sure to add a ``;`` after the last item on ``PATH`` before adding -the scripts directory to it. +Variables" section, and you will need to exit and restart your command shell +(command.com, cmd.exe, bash, or other) for the change to take effect. Be sure +to add a ``;`` after the last item on ``PATH`` before adding the scripts +directory to it. Note that instead of changing your ``PATH`` to include the Python scripts directory, you can also retarget the installation location for scripts so they From martin at v.loewis.de Tue Jan 3 22:50:20 2006 From: martin at v.loewis.de (=?ISO-8859-1?Q?=22Martin_v=2E_L=F6wis=22?=) Date: Tue, 03 Jan 2006 22:50:20 +0100 Subject: [Python-checkins] commit of r41903 - python/branches/ssize_t/Objects/typeobject.c In-Reply-To: References: <20060103090924.C0BCC1E4009@bag.python.org> Message-ID: <43BAF19C.60207@v.loewis.de> Neal Norwitz wrote: > Thanks Martin. I realized I missed those last night. I just started compiling this branch under VS2005 for AMD64, and it reported *many* more truncations from Py_ssize_t to int (also from ptrdiff_t to int, and from size_t to int). Still a lot to do. Regards, Martin From nnorwitz at gmail.com Tue Jan 3 22:53:54 2006 From: nnorwitz at gmail.com (Neal Norwitz) Date: Tue, 3 Jan 2006 13:53:54 -0800 Subject: [Python-checkins] commit of r41903 - python/branches/ssize_t/Objects/typeobject.c In-Reply-To: <43BAF19C.60207@v.loewis.de> References: <20060103090924.C0BCC1E4009@bag.python.org> <43BAF19C.60207@v.loewis.de> Message-ID: On 1/3/06, "Martin v. L?wis" wrote: > Neal Norwitz wrote: > > Thanks Martin. I realized I missed those last night. > > I just started compiling this branch under VS2005 for > AMD64, and it reported *many* more truncations from > Py_ssize_t to int (also from ptrdiff_t to int, and > from size_t to int). > > Still a lot to do. Do you know if there is a way to produce these warnings with gcc? If I use g++ that will give me signed-ness warnings, but I'm not sure about the others. But if I use g++, I'll have to modify a lot of the code to not use C++ reserved words. I'm not sure that's worth the pain. n From martin at v.loewis.de Tue Jan 3 22:59:42 2006 From: martin at v.loewis.de (=?ISO-8859-1?Q?=22Martin_v=2E_L=F6wis=22?=) Date: Tue, 03 Jan 2006 22:59:42 +0100 Subject: [Python-checkins] commit of r41906 - python/branches/ssize_t/Objects/obmalloc.c In-Reply-To: <1f7befae0601031038w6f6c6f03o4549e039dafd19a8@mail.gmail.com> References: <20060103131655.201471E400A@bag.python.org> <1f7befae0601031038w6f6c6f03o4549e039dafd19a8@mail.gmail.com> Message-ID: <43BAF3CE.7040506@v.loewis.de> Tim Peters wrote: >>Modified: python/branches/ssize_t/Objects/obmalloc.c [...] > This checkin should be reverted for now. Not sure whether you've noticed this is "just" on the ssize_t branch. Without this patch, it is not possible to allocate 4GiB or more for a string object in debug mode, which kind of defeated my attempts to test that. I certainly plan to remove all XXX marks I have introduced in that branch before suggesting to integrate it back into the trunk. So "for now", I would prefer to keep it, and only revert it if I have a complete fix. > It's in > _PyObject_DebugMalloc, and at present the layout of the extra debug > info in a PYMALLOC_DEBUG build is hard-coded to use 4-byte fields, no > matter what sizeof(size_t) may be. One of the extra fields recorded > in a PYMALLOC_DEBUG build is the number of bytes requested, and at > present it's simply not capable of recording a value that doesn't fit > in 4 bytes. Well, AFAICT, it "works" even if it records only records the lower 4 bytes of the requested size. Upon freeing, it just won't put enough DEADBYTEs in, which I cannot see having further unfortunate consequences (except that it won't diagnose errors as good anymore as it could). > Even after (if ever ;-)) this is changed to support recording 8-byte > values on a box where sizeof(size_t) == 8, the "total < nbytes" part > of the test would still be appropriate: PyObject_DebugMalloc requests > more memory (`total`) than the user asked for (`nbytes`), and the > computation of `total` may have overflowed. That's what "total < > nbytes" is checking, and that's the right way to spell the overflow > check on any box. Certainly; I did not mean to completely disable this test. Regards, Martin From martin at v.loewis.de Tue Jan 3 23:42:41 2006 From: martin at v.loewis.de (=?ISO-8859-1?Q?=22Martin_v=2E_L=F6wis=22?=) Date: Tue, 03 Jan 2006 23:42:41 +0100 Subject: [Python-checkins] commit of r41903 - python/branches/ssize_t/Objects/typeobject.c In-Reply-To: References: <20060103090924.C0BCC1E4009@bag.python.org> <43BAF19C.60207@v.loewis.de> Message-ID: <43BAFDE1.2090001@v.loewis.de> Neal Norwitz wrote: > Do you know if there is a way to produce these warnings with gcc? If > I use g++ that will give me signed-ness warnings, but I'm not sure > about the others. But if I use g++, I'll have to modify a lot of the > code to not use C++ reserved words. I'm not sure that's worth the > pain. Neither gcc nor g++ can report integer truncations, unfortunately. Perhaps a -Wtruncation flag would be in order. Regards, Martin From python-checkins at python.org Tue Jan 3 23:54:35 2006 From: python-checkins at python.org (skip.montanaro) Date: Tue, 3 Jan 2006 23:54:35 +0100 (CET) Subject: [Python-checkins] commit of r41848 - python/trunk/setup.py Message-ID: <20060103225435.909711E4002@bag.python.org> Author: skip.montanaro Date: Fri Dec 30 06:01:26 2005 New Revision: 41848 Modified: python/trunk/setup.py Log: typo, use parens for continued expr Modified: python/trunk/setup.py ============================================================================== --- python/trunk/setup.py (original) +++ python/trunk/setup.py Fri Dec 30 06:01:26 2005 @@ -1000,7 +1000,7 @@ join(os.getenv('HOME'), '/Library/Frameworks') ] - # Find the directory that contains the Tcl.framwork and Tk.framework + # Find the directory that contains the Tcl.framework and Tk.framework # bundles. # XXX distutils should support -F! for F in framework_dirs: @@ -1051,8 +1051,8 @@ # AquaTk is a separate method. Only one Tkinter will be built on # Darwin - either AquaTk, if it is found, or X11 based Tk. platform = self.get_platform() - if platform == 'darwin' and \ - self.detect_tkinter_darwin(inc_dirs, lib_dirs): + if (platform == 'darwin' and + self.detect_tkinter_darwin(inc_dirs, lib_dirs)): return # Assume we haven't found any of the libraries or include files From python-checkins at python.org Wed Jan 4 00:10:21 2006 From: python-checkins at python.org (phillip.eby) Date: Wed, 4 Jan 2006 00:10:21 +0100 (CET) Subject: [Python-checkins] commit of r41909 - in sandbox/trunk/setuptools: pkg_resources.py setuptools/tests/test_resources.py Message-ID: <20060103231021.31DD01E4002@bag.python.org> Author: phillip.eby Date: Wed Jan 4 00:10:20 2006 New Revision: 41909 Modified: sandbox/trunk/setuptools/pkg_resources.py sandbox/trunk/setuptools/setuptools/tests/test_resources.py Log: Make it clearer that Requirement.parse() is the only way for users to create correct Requirement instances. Modified: sandbox/trunk/setuptools/pkg_resources.py ============================================================================== --- sandbox/trunk/setuptools/pkg_resources.py (original) +++ sandbox/trunk/setuptools/pkg_resources.py Wed Jan 4 00:10:20 2006 @@ -2049,9 +2049,9 @@ class Requirement: - def __init__(self, project_name, specs=(), extras=()): - self.project_name = project_name - self.key = project_name.lower() + def __init__(self, project_name, specs, extras): + """DO NOT CALL THIS UNDOCUMENTED METHOD; use Requirement.parse()!""" + self.project_name, self.key = project_name, project_name.lower() index = [(parse_version(v),state_machine[op],op,v) for op,v in specs] index.sort() self.specs = [(op,ver) for parsed,trans,op,ver in index] Modified: sandbox/trunk/setuptools/setuptools/tests/test_resources.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/tests/test_resources.py (original) +++ sandbox/trunk/setuptools/setuptools/tests/test_resources.py Wed Jan 4 00:10:20 2006 @@ -291,22 +291,22 @@ r = Requirement.parse("Twisted>=1.2") self.assertEqual(str(r),"Twisted>=1.2") self.assertEqual(repr(r),"Requirement.parse('Twisted>=1.2')") - self.assertEqual(r, Requirement("Twisted", [('>=','1.2')])) - self.assertEqual(r, Requirement("twisTed", [('>=','1.2')])) - self.assertNotEqual(r, Requirement("Twisted", [('>=','2.0')])) - self.assertNotEqual(r, Requirement("Zope", [('>=','1.2')])) - self.assertNotEqual(r, Requirement("Zope", [('>=','3.0')])) + self.assertEqual(r, Requirement("Twisted", [('>=','1.2')], ())) + self.assertEqual(r, Requirement("twisTed", [('>=','1.2')], ())) + self.assertNotEqual(r, Requirement("Twisted", [('>=','2.0')], ())) + self.assertNotEqual(r, Requirement("Zope", [('>=','1.2')], ())) + self.assertNotEqual(r, Requirement("Zope", [('>=','3.0')], ())) self.assertNotEqual(r, Requirement.parse("Twisted[extras]>=1.2")) def testOrdering(self): - r1 = Requirement("Twisted", [('==','1.2c1'),('>=','1.2')]) - r2 = Requirement("Twisted", [('>=','1.2'),('==','1.2c1')]) + r1 = Requirement("Twisted", [('==','1.2c1'),('>=','1.2')], ()) + r2 = Requirement("Twisted", [('>=','1.2'),('==','1.2c1')], ()) self.assertEqual(r1,r2) self.assertEqual(str(r1),str(r2)) self.assertEqual(str(r2),"Twisted==1.2c1,>=1.2") def testBasicContains(self): - r = Requirement("Twisted", [('>=','1.2')]) + r = Requirement("Twisted", [('>=','1.2')], ()) foo_dist = Distribution.from_filename("FooPkg-1.3_1.egg") twist11 = Distribution.from_filename("Twisted-1.1.egg") twist12 = Distribution.from_filename("Twisted-1.2.egg") @@ -418,15 +418,15 @@ def testSimpleRequirements(self): self.assertEqual( list(parse_requirements('Twis-Ted>=1.2-1')), - [Requirement('Twis-Ted',[('>=','1.2-1')])] + [Requirement('Twis-Ted',[('>=','1.2-1')], ())] ) self.assertEqual( list(parse_requirements('Twisted >=1.2, \ # more\n<2.0')), - [Requirement('Twisted',[('>=','1.2'),('<','2.0')])] + [Requirement('Twisted',[('>=','1.2'),('<','2.0')], ())] ) self.assertEqual( Requirement.parse("FooBar==1.99a3"), - Requirement("FooBar", [('==','1.99a3')]) + Requirement("FooBar", [('==','1.99a3')], ()) ) self.assertRaises(ValueError,Requirement.parse,">=2.3") self.assertRaises(ValueError,Requirement.parse,"x\\") From python-checkins at python.org Wed Jan 4 02:30:18 2006 From: python-checkins at python.org (hyeshik.chang) Date: Wed, 4 Jan 2006 02:30:18 +0100 (CET) Subject: [Python-checkins] commit of r41910 - python/trunk/Python/dynload_shlib.c Message-ID: <20060104013018.6494A1E402E@bag.python.org> Author: hyeshik.chang Date: Wed Jan 4 02:30:17 2006 New Revision: 41910 Modified: python/trunk/Python/dynload_shlib.c Log: Fix a warning from gcc by adding a missed const qualifier. Modified: python/trunk/Python/dynload_shlib.c ============================================================================== --- python/trunk/Python/dynload_shlib.c (original) +++ python/trunk/Python/dynload_shlib.c Wed Jan 4 02:30:17 2006 @@ -130,7 +130,7 @@ handle = dlopen(pathname, dlopenflags); if (handle == NULL) { - char *error = dlerror(); + const char *error = dlerror(); if (error == NULL) error = "unknown dlopen() error"; PyErr_SetString(PyExc_ImportError, error); From nnorwitz at gmail.com Wed Jan 4 07:05:33 2006 From: nnorwitz at gmail.com (Neal Norwitz) Date: Tue, 3 Jan 2006 22:05:33 -0800 Subject: [Python-checkins] commit of r41907 - python/trunk/Makefile.pre.in In-Reply-To: <20060103143057.4DB8E1E401C@bag.python.org> References: <20060103143057.4DB8E1E401C@bag.python.org> Message-ID: This change broke on Solaris 10. http://www.python.org/dev/buildbot/sparc%20solaris10%20gcc%20trunk/builds/48/step-compile/0 make: Fatal error: Command failed for target `buildno' n -- On 1/3/06, barry.warsaw wrote: > Author: barry.warsaw > Date: Tue Jan 3 15:30:55 2006 > New Revision: 41907 > > Modified: > python/trunk/Makefile.pre.in > Log: > SF bug #1395926: Also test for svnversion command on $PATH before using it to > calculate the build number. > > > Modified: python/trunk/Makefile.pre.in > ============================================================================== > --- python/trunk/Makefile.pre.in (original) > +++ python/trunk/Makefile.pre.in Tue Jan 3 15:30:55 2006 > @@ -349,7 +349,7 @@ > $(SIGNAL_OBJS) \ > $(MODOBJS) \ > $(srcdir)/Modules/getbuildinfo.c > - if test -d $(srcdir)/.svn; then \ > + if test -d $(srcdir)/.svn -a ! -z "`type -t svnversion`" ; then \ > svnversion $(srcdir) >buildno; \ > elif test -f buildno; then \ > expr `cat buildno` + 1 >buildno1; \ > _______________________________________________ > Python-checkins mailing list > Python-checkins at python.org > http://mail.python.org/mailman/listinfo/python-checkins > From python-checkins at python.org Wed Jan 4 09:28:07 2006 From: python-checkins at python.org (skip.montanaro) Date: Wed, 4 Jan 2006 09:28:07 +0100 (CET) Subject: [Python-checkins] r41848 - python/trunk/setup.py Message-ID: <20060104082807.C0A351E4002@bag.python.org> Author: skip.montanaro Date: Fri Dec 30 06:01:26 2005 New Revision: 41848 Modified: python/trunk/setup.py Log: typo, use parens for continued expr Modified: python/trunk/setup.py ============================================================================== --- python/trunk/setup.py (original) +++ python/trunk/setup.py Fri Dec 30 06:01:26 2005 @@ -1000,7 +1000,7 @@ join(os.getenv('HOME'), '/Library/Frameworks') ] - # Find the directory that contains the Tcl.framwork and Tk.framework + # Find the directory that contains the Tcl.framework and Tk.framework # bundles. # XXX distutils should support -F! for F in framework_dirs: @@ -1051,8 +1051,8 @@ # AquaTk is a separate method. Only one Tkinter will be built on # Darwin - either AquaTk, if it is found, or X11 based Tk. platform = self.get_platform() - if platform == 'darwin' and \ - self.detect_tkinter_darwin(inc_dirs, lib_dirs): + if (platform == 'darwin' and + self.detect_tkinter_darwin(inc_dirs, lib_dirs)): return # Assume we haven't found any of the libraries or include files From sjoerd at acm.org Wed Jan 4 10:29:06 2006 From: sjoerd at acm.org (Sjoerd Mullender) Date: Wed, 04 Jan 2006 10:29:06 +0100 Subject: [Python-checkins] commit of r41907 - python/trunk/Makefile.pre.in In-Reply-To: References: <20060103143057.4DB8E1E401C@bag.python.org> Message-ID: <43BB9562.2070404@acm.org> Neal Norwitz wrote: > This change broke on Solaris 10. > > http://www.python.org/dev/buildbot/sparc%20solaris10%20gcc%20trunk/builds/48/step-compile/0 > > make: Fatal error: Command failed for target `buildno' The program "type" is not portable: it's a bash-ism. > n > -- > > On 1/3/06, barry.warsaw wrote: > >>Author: barry.warsaw >>Date: Tue Jan 3 15:30:55 2006 >>New Revision: 41907 >> >>Modified: >> python/trunk/Makefile.pre.in >>Log: >>SF bug #1395926: Also test for svnversion command on $PATH before using it to >>calculate the build number. >> >> >>Modified: python/trunk/Makefile.pre.in >>============================================================================== >>--- python/trunk/Makefile.pre.in (original) >>+++ python/trunk/Makefile.pre.in Tue Jan 3 15:30:55 2006 >>@@ -349,7 +349,7 @@ >> $(SIGNAL_OBJS) \ >> $(MODOBJS) \ >> $(srcdir)/Modules/getbuildinfo.c >>- if test -d $(srcdir)/.svn; then \ >>+ if test -d $(srcdir)/.svn -a ! -z "`type -t svnversion`" ; then \ >> svnversion $(srcdir) >buildno; \ >> elif test -f buildno; then \ >> expr `cat buildno` + 1 >buildno1; \ >>_______________________________________________ >>Python-checkins mailing list >>Python-checkins at python.org >>http://mail.python.org/mailman/listinfo/python-checkins >> > > _______________________________________________ > Python-checkins mailing list > Python-checkins at python.org > http://mail.python.org/mailman/listinfo/python-checkins -- Sjoerd Mullender -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 369 bytes Desc: OpenPGP digital signature Url : http://mail.python.org/pipermail/python-checkins/attachments/20060104/83e8494e/attachment.pgp From anthony at interlink.com.au Wed Jan 4 16:38:49 2006 From: anthony at interlink.com.au (Anthony Baxter) Date: Thu, 5 Jan 2006 02:38:49 +1100 Subject: [Python-checkins] commit of r41907 - python/trunk/Makefile.pre.in In-Reply-To: <43BB9562.2070404@acm.org> References: <20060103143057.4DB8E1E401C@bag.python.org> <43BB9562.2070404@acm.org> Message-ID: <200601050238.51766.anthony@interlink.com.au> On Wednesday 04 January 2006 20:29, Sjoerd Mullender wrote: > Neal Norwitz wrote: > > This change broke on Solaris 10. > > > > http://www.python.org/dev/buildbot/sparc%20solaris10%20gcc%20trun > >k/builds/48/step-compile/0 > > > > make: Fatal error: Command failed for target `buildno' > > The program "type" is not portable: it's a bash-ism. /usr/bin/type exists, at least on Solaris 9. It seems to work the way the Makefile expects. Anthony -- Anthony Baxter It's never too late to have a happy childhood. From python-checkins at python.org Wed Jan 4 18:55:52 2006 From: python-checkins at python.org (phillip.eby) Date: Wed, 4 Jan 2006 18:55:52 +0100 (CET) Subject: [Python-checkins] r41911 - sandbox/trunk/setuptools/ez_setup.py Message-ID: <20060104175552.B61761E4028@bag.python.org> Author: phillip.eby Date: Wed Jan 4 18:55:52 2006 New Revision: 41911 Modified: sandbox/trunk/setuptools/ez_setup.py Log: 0.6a9 release Modified: sandbox/trunk/setuptools/ez_setup.py ============================================================================== --- sandbox/trunk/setuptools/ez_setup.py (original) +++ sandbox/trunk/setuptools/ez_setup.py Wed Jan 4 18:55:52 2006 @@ -36,6 +36,8 @@ 'setuptools-0.6a7-py2.4.egg': 'c6d62dab4461f71aed943caea89e6f20', 'setuptools-0.6a8-py2.3.egg': '2f18eaaa3f544f5543ead4a68f3b2e1a', 'setuptools-0.6a8-py2.4.egg': '799018f2894f14c9f8bcb2b34e69b391', + 'setuptools-0.6a9-py2.3.egg': '8e438ad70438b07b0d8f82cae42b278f', + 'setuptools-0.6a9-py2.4.egg': '8f6e01fc12fb1cd006dc0d6c04327ec1', } import sys, os From barry at python.org Wed Jan 4 21:26:50 2006 From: barry at python.org (Barry Warsaw) Date: Wed, 04 Jan 2006 15:26:50 -0500 Subject: [Python-checkins] commit of r41907 - python/trunk/Makefile.pre.in In-Reply-To: <200601050238.51766.anthony@interlink.com.au> References: <20060103143057.4DB8E1E401C@bag.python.org> <43BB9562.2070404@acm.org> <200601050238.51766.anthony@interlink.com.au> Message-ID: <1136406410.10342.36.camel@geddy.wooz.org> On Thu, 2006-01-05 at 02:38 +1100, Anthony Baxter wrote: > On Wednesday 04 January 2006 20:29, Sjoerd Mullender wrote: > > Neal Norwitz wrote: > > > This change broke on Solaris 10. > > > > > > http://www.python.org/dev/buildbot/sparc%20solaris10%20gcc%20trun > > >k/builds/48/step-compile/0 > > > > > > make: Fatal error: Command failed for target `buildno' > > > > The program "type" is not portable: it's a bash-ism. > > /usr/bin/type exists, at least on Solaris 9. > > It seems to work the way the Makefile expects. I actually tested this on Solaris 9 (and Gentoo and MacOS 10.4) before checking it in, and it worked for me. Verifying just now, I realized that's because I used GNU make instead of /usr/ccs/bin/make to do my test. If I use the latter, it breaks. GNU make works fine. Unfortunately, /usr/bin/type doesn't seem to accept the -t flag for me on Solaris 9. Okay, so what's the best (read: portable) way to do this? Maybe it's reasonable just to say that if you have a Subversion checkout of the tree, you better have svnversion on your $PATH? The SF bug in question is #1395926. -Barry -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 307 bytes Desc: This is a digitally signed message part Url : http://mail.python.org/pipermail/python-checkins/attachments/20060104/9b6094c4/attachment.pgp From python-checkins at python.org Wed Jan 4 22:04:24 2006 From: python-checkins at python.org (reinhold.birkenfeld) Date: Wed, 4 Jan 2006 22:04:24 +0100 (CET) Subject: [Python-checkins] r41912 - python/branches/release24-maint/Doc/lib/libcookielib.tex Message-ID: <20060104210424.CDBA91E4002@bag.python.org> Author: reinhold.birkenfeld Date: Wed Jan 4 22:04:24 2006 New Revision: 41912 Modified: python/branches/release24-maint/Doc/lib/libcookielib.tex Log: Bug #139571: wrong LaTeX label Modified: python/branches/release24-maint/Doc/lib/libcookielib.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libcookielib.tex (original) +++ python/branches/release24-maint/Doc/lib/libcookielib.tex Wed Jan 4 22:04:24 2006 @@ -560,7 +560,7 @@ \end{memberdesc} -\subsection{Cookie Objects \label{cookie-jar-objects}} +\subsection{Cookie Objects \label{cookie-objects}} \class{Cookie} instances have Python attributes roughly corresponding to the standard cookie-attributes specified in the various cookie From python-checkins at python.org Wed Jan 4 22:05:07 2006 From: python-checkins at python.org (reinhold.birkenfeld) Date: Wed, 4 Jan 2006 22:05:07 +0100 (CET) Subject: [Python-checkins] r41913 - python/trunk/Doc/lib/libcookielib.tex Message-ID: <20060104210507.83A4C1E4002@bag.python.org> Author: reinhold.birkenfeld Date: Wed Jan 4 22:05:07 2006 New Revision: 41913 Modified: python/trunk/Doc/lib/libcookielib.tex Log: Bug #1395715: wrong LaTeX label Modified: python/trunk/Doc/lib/libcookielib.tex ============================================================================== --- python/trunk/Doc/lib/libcookielib.tex (original) +++ python/trunk/Doc/lib/libcookielib.tex Wed Jan 4 22:05:07 2006 @@ -584,7 +584,7 @@ \end{memberdesc} -\subsection{Cookie Objects \label{cookie-jar-objects}} +\subsection{Cookie Objects \label{cookie-objects}} \class{Cookie} instances have Python attributes roughly corresponding to the standard cookie-attributes specified in the various cookie From sjoerd at acm.org Wed Jan 4 22:57:34 2006 From: sjoerd at acm.org (Sjoerd Mullender) Date: Wed, 04 Jan 2006 22:57:34 +0100 Subject: [Python-checkins] commit of r41907 - python/trunk/Makefile.pre.in In-Reply-To: <1136406410.10342.36.camel@geddy.wooz.org> References: <20060103143057.4DB8E1E401C@bag.python.org> <43BB9562.2070404@acm.org> <200601050238.51766.anthony@interlink.com.au> <1136406410.10342.36.camel@geddy.wooz.org> Message-ID: <43BC44CE.8070000@acm.org> Barry Warsaw wrote: > On Thu, 2006-01-05 at 02:38 +1100, Anthony Baxter wrote: > >>On Wednesday 04 January 2006 20:29, Sjoerd Mullender wrote: >> >>>Neal Norwitz wrote: >>> >>>>This change broke on Solaris 10. >>>> >>>>http://www.python.org/dev/buildbot/sparc%20solaris10%20gcc%20trun >>>>k/builds/48/step-compile/0 >>>> >>>>make: Fatal error: Command failed for target `buildno' >>> >>>The program "type" is not portable: it's a bash-ism. >> >>/usr/bin/type exists, at least on Solaris 9. >> >>It seems to work the way the Makefile expects. > > > I actually tested this on Solaris 9 (and Gentoo and MacOS 10.4) before > checking it in, and it worked for me. Verifying just now, I realized > that's because I used GNU make instead of /usr/ccs/bin/make to do my > test. If I use the latter, it breaks. GNU make works fine. > > Unfortunately, /usr/bin/type doesn't seem to accept the -t flag for me > on Solaris 9. Okay, so what's the best (read: portable) way to do this? > Maybe it's reasonable just to say that if you have a Subversion checkout > of the tree, you better have svnversion on your $PATH? > > The SF bug in question is #1395926. I assume the test is to see whether svnversion can be found in $PATH. The test could be rewritten as if test -d $(srcdir)/.svn && which svnversion >/dev/null 2>&1; then ... The program which is much more widely spread than type (it's much older). And if it doesn't exist, the test fails. Or you could just try the program with the fallback strategy: if test -d $(srcdir)/.svn && svnversion $(srcdir) > buildno 2>/dev/null then echo OK elif ... -- Sjoerd Mullender -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 369 bytes Desc: OpenPGP digital signature Url : http://mail.python.org/pipermail/python-checkins/attachments/20060104/02d85034/attachment.pgp From python-checkins at python.org Thu Jan 5 01:01:59 2006 From: python-checkins at python.org (phillip.eby) Date: Thu, 5 Jan 2006 01:01:59 +0100 (CET) Subject: [Python-checkins] r41914 - in sandbox/trunk/setuptools: ez_setup.py setup.py setuptools/__init__.py Message-ID: <20060105000159.7F8351E4002@bag.python.org> Author: phillip.eby Date: Thu Jan 5 01:01:57 2006 New Revision: 41914 Modified: sandbox/trunk/setuptools/ez_setup.py sandbox/trunk/setuptools/setup.py sandbox/trunk/setuptools/setuptools/__init__.py Log: Bump version number to start the 0.6a10dev cycle. Modified: sandbox/trunk/setuptools/ez_setup.py ============================================================================== --- sandbox/trunk/setuptools/ez_setup.py (original) +++ sandbox/trunk/setuptools/ez_setup.py Thu Jan 5 01:01:57 2006 @@ -14,7 +14,7 @@ This file can also be run as a script to install or upgrade setuptools. """ import sys -DEFAULT_VERSION = "0.6a9" +DEFAULT_VERSION = "0.6a10" DEFAULT_URL = "http://cheeseshop.python.org/packages/%s/s/setuptools/" % sys.version[:3] md5_data = { Modified: sandbox/trunk/setuptools/setup.py ============================================================================== --- sandbox/trunk/setuptools/setup.py (original) +++ sandbox/trunk/setuptools/setup.py Thu Jan 5 01:01:57 2006 @@ -19,7 +19,7 @@ d = {}; execfile(convert_path('setuptools/command/__init__.py'), d) SETUP_COMMANDS = d['__all__'] -VERSION = "0.6a9" +VERSION = "0.6a10" from setuptools import setup, find_packages import sys scripts = [] Modified: sandbox/trunk/setuptools/setuptools/__init__.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/__init__.py (original) +++ sandbox/trunk/setuptools/setuptools/__init__.py Thu Jan 5 01:01:57 2006 @@ -8,7 +8,7 @@ from distutils.util import convert_path import os.path -__version__ = '0.6a9' +__version__ = '0.6a10' __all__ = [ 'setup', 'Distribution', 'Feature', 'Command', 'Extension', 'Require', 'find_packages' From python-checkins at python.org Thu Jan 5 06:43:37 2006 From: python-checkins at python.org (neal.norwitz) Date: Thu, 5 Jan 2006 06:43:37 +0100 (CET) Subject: [Python-checkins] r41915 - in python/trunk: Misc/NEWS Modules/_bsddb.c Message-ID: <20060105054337.C24051E4002@bag.python.org> Author: neal.norwitz Date: Thu Jan 5 06:43:35 2006 New Revision: 41915 Modified: python/trunk/Misc/NEWS python/trunk/Modules/_bsddb.c Log: Fix errors on 64-bit platforms. Will backport Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Thu Jan 5 06:43:35 2006 @@ -209,6 +209,8 @@ Extension Modules ----------------- +- Fix 64-bit problems in bsddb. + - Patch #1365916: fix some unsafe 64-bit mmap methods. - Bug #1290333: Added a workaround for cjkcodecs' _codecs_cn build Modified: python/trunk/Modules/_bsddb.c ============================================================================== --- python/trunk/Modules/_bsddb.c (original) +++ python/trunk/Modules/_bsddb.c Thu Jan 5 06:43:35 2006 @@ -1522,7 +1522,7 @@ if (self->primaryDBType == DB_RECNO || self->primaryDBType == DB_QUEUE) - pkeyObj = PyInt_FromLong(*(long *)pkey.data); + pkeyObj = PyInt_FromLong(*(int *)pkey.data); else pkeyObj = PyString_FromStringAndSize(pkey.data, pkey.size); @@ -1531,7 +1531,7 @@ PyObject *keyObj; int type = _DB_get_type(self); if (type == DB_RECNO || type == DB_QUEUE) - keyObj = PyInt_FromLong(*(long *)key.data); + keyObj = PyInt_FromLong(*(int *)key.data); else keyObj = PyString_FromStringAndSize(key.data, key.size); retval = Py_BuildValue("OOO", keyObj, pkeyObj, dataObj); @@ -3172,7 +3172,7 @@ if (self->mydb->primaryDBType == DB_RECNO || self->mydb->primaryDBType == DB_QUEUE) - pkeyObj = PyInt_FromLong(*(long *)pkey.data); + pkeyObj = PyInt_FromLong(*(int *)pkey.data); else pkeyObj = PyString_FromStringAndSize(pkey.data, pkey.size); @@ -3181,7 +3181,7 @@ PyObject *keyObj; int type = _DB_get_type(self->mydb); if (type == DB_RECNO || type == DB_QUEUE) - keyObj = PyInt_FromLong(*(long *)key.data); + keyObj = PyInt_FromLong(*(int *)key.data); else keyObj = PyString_FromStringAndSize(key.data, key.size); retval = Py_BuildValue("OOO", keyObj, pkeyObj, dataObj); From python-checkins at python.org Thu Jan 5 06:45:14 2006 From: python-checkins at python.org (neal.norwitz) Date: Thu, 5 Jan 2006 06:45:14 +0100 (CET) Subject: [Python-checkins] r41916 - in python/branches/release24-maint: Misc/NEWS Modules/_bsddb.c Message-ID: <20060105054514.BB4E61E4002@bag.python.org> Author: neal.norwitz Date: Thu Jan 5 06:45:12 2006 New Revision: 41916 Modified: python/branches/release24-maint/Misc/NEWS python/branches/release24-maint/Modules/_bsddb.c Log: Backport: Fix errors on 64-bit platforms. (There are still some test problems Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Thu Jan 5 06:45:12 2006 @@ -38,6 +38,8 @@ Extension Modules ----------------- +- Fix 64-bit problems in bsddb. + - Bug #1290333: Added a workaround for cjkcodecs' _codecs_cn build problem on AIX. Modified: python/branches/release24-maint/Modules/_bsddb.c ============================================================================== --- python/branches/release24-maint/Modules/_bsddb.c (original) +++ python/branches/release24-maint/Modules/_bsddb.c Thu Jan 5 06:45:12 2006 @@ -1510,7 +1510,7 @@ if (self->primaryDBType == DB_RECNO || self->primaryDBType == DB_QUEUE) - pkeyObj = PyInt_FromLong(*(long *)pkey.data); + pkeyObj = PyInt_FromLong(*(int *)pkey.data); else pkeyObj = PyString_FromStringAndSize(pkey.data, pkey.size); @@ -1519,7 +1519,7 @@ PyObject *keyObj; int type = _DB_get_type(self); if (type == DB_RECNO || type == DB_QUEUE) - keyObj = PyInt_FromLong(*(long *)key.data); + keyObj = PyInt_FromLong(*(int *)key.data); else keyObj = PyString_FromStringAndSize(key.data, key.size); retval = Py_BuildValue("OOO", keyObj, pkeyObj, dataObj); @@ -2991,7 +2991,7 @@ if (self->mydb->primaryDBType == DB_RECNO || self->mydb->primaryDBType == DB_QUEUE) - pkeyObj = PyInt_FromLong(*(long *)pkey.data); + pkeyObj = PyInt_FromLong(*(int *)pkey.data); else pkeyObj = PyString_FromStringAndSize(pkey.data, pkey.size); @@ -3000,7 +3000,7 @@ PyObject *keyObj; int type = _DB_get_type(self->mydb); if (type == DB_RECNO || type == DB_QUEUE) - keyObj = PyInt_FromLong(*(long *)key.data); + keyObj = PyInt_FromLong(*(int *)key.data); else keyObj = PyString_FromStringAndSize(key.data, key.size); retval = Py_BuildValue("OOO", keyObj, pkeyObj, dataObj); From python-checkins at python.org Thu Jan 5 07:09:14 2006 From: python-checkins at python.org (neal.norwitz) Date: Thu, 5 Jan 2006 07:09:14 +0100 (CET) Subject: [Python-checkins] r41917 - python/trunk/Lib/test/test_curses.py Message-ID: <20060105060914.BBA3E1E4002@bag.python.org> Author: neal.norwitz Date: Thu Jan 5 07:09:13 2006 New Revision: 41917 Modified: python/trunk/Lib/test/test_curses.py Log: Skip test_curses if stdin is not a tty (like when run from cron or buildbot). Will backport. Modified: python/trunk/Lib/test/test_curses.py ============================================================================== --- python/trunk/Lib/test/test_curses.py (original) +++ python/trunk/Lib/test/test_curses.py Thu Jan 5 07:09:13 2006 @@ -8,7 +8,7 @@ # getmouse(), ungetmouse(), init_color() # -import curses, sys, tempfile +import curses, sys, tempfile, os # Optionally test curses module. This currently requires that the # 'curses' resource be given on the regrtest command line using the -u @@ -16,6 +16,8 @@ from test import test_support test_support.requires('curses') +if not os.isatty(sys.stdin.fileno()): + raise test_support.TestSkipped, "stdin is not a tty" def window_funcs(stdscr): "Test the methods of windows" From python-checkins at python.org Thu Jan 5 07:10:18 2006 From: python-checkins at python.org (neal.norwitz) Date: Thu, 5 Jan 2006 07:10:18 +0100 (CET) Subject: [Python-checkins] r41918 - python/branches/release24-maint/Lib/test/test_curses.py Message-ID: <20060105061018.568B51E4002@bag.python.org> Author: neal.norwitz Date: Thu Jan 5 07:10:16 2006 New Revision: 41918 Modified: python/branches/release24-maint/Lib/test/test_curses.py Log: Backport: Skip test_curses if stdin is not a tty (like when run from cron or buildbot). Modified: python/branches/release24-maint/Lib/test/test_curses.py ============================================================================== --- python/branches/release24-maint/Lib/test/test_curses.py (original) +++ python/branches/release24-maint/Lib/test/test_curses.py Thu Jan 5 07:10:16 2006 @@ -8,7 +8,7 @@ # getmouse(), ungetmouse(), init_color() # -import curses, sys, tempfile +import curses, sys, tempfile, os # Optionally test curses module. This currently requires that the # 'curses' resource be given on the regrtest command line using the -u @@ -16,6 +16,8 @@ from test import test_support test_support.requires('curses') +if not os.isatty(sys.stdin.fileno()): + raise test_support.TestSkipped, "stdin is not a tty" def window_funcs(stdscr): "Test the methods of windows" From python-checkins at python.org Thu Jan 5 07:38:17 2006 From: python-checkins at python.org (neal.norwitz) Date: Thu, 5 Jan 2006 07:38:17 +0100 (CET) Subject: [Python-checkins] r41919 - python/trunk/Makefile.pre.in Message-ID: <20060105063817.7A55C1E400A@bag.python.org> Author: neal.norwitz Date: Thu Jan 5 07:38:15 2006 New Revision: 41919 Modified: python/trunk/Makefile.pre.in Log: Try to make svnversion test more portable, based on Sjoerd's suggestion Modified: python/trunk/Makefile.pre.in ============================================================================== --- python/trunk/Makefile.pre.in (original) +++ python/trunk/Makefile.pre.in Thu Jan 5 07:38:15 2006 @@ -349,7 +349,7 @@ $(SIGNAL_OBJS) \ $(MODOBJS) \ $(srcdir)/Modules/getbuildinfo.c - if test -d $(srcdir)/.svn -a ! -z "`type -t svnversion`" ; then \ + if test -d $(srcdir)/.svn -a "`which svnversion 2> /dev/null`"; then \ svnversion $(srcdir) >buildno; \ elif test -f buildno; then \ expr `cat buildno` + 1 >buildno1; \ From python-checkins at python.org Thu Jan 5 08:16:14 2006 From: python-checkins at python.org (neal.norwitz) Date: Thu, 5 Jan 2006 08:16:14 +0100 (CET) Subject: [Python-checkins] r41920 - python/trunk/Lib/test/test_linuxaudiodev.py python/trunk/Lib/test/test_ossaudiodev.py Message-ID: <20060105071614.BA7BB1E4002@bag.python.org> Author: neal.norwitz Date: Thu Jan 5 08:16:13 2006 New Revision: 41920 Modified: python/trunk/Lib/test/test_linuxaudiodev.py python/trunk/Lib/test/test_ossaudiodev.py Log: If the audio file does not exist, the test should be skipped. Will backport. Modified: python/trunk/Lib/test/test_linuxaudiodev.py ============================================================================== --- python/trunk/Lib/test/test_linuxaudiodev.py (original) +++ python/trunk/Lib/test/test_linuxaudiodev.py Thu Jan 5 08:16:13 2006 @@ -28,7 +28,7 @@ try: a = linuxaudiodev.open('w') except linuxaudiodev.error, msg: - if msg[0] in (errno.EACCES, errno.ENODEV, errno.EBUSY): + if msg[0] in (errno.EACCES, errno.ENOENT, errno.ENODEV, errno.EBUSY): raise TestSkipped, msg raise TestFailed, msg Modified: python/trunk/Lib/test/test_ossaudiodev.py ============================================================================== --- python/trunk/Lib/test/test_ossaudiodev.py (original) +++ python/trunk/Lib/test/test_ossaudiodev.py Thu Jan 5 08:16:13 2006 @@ -45,7 +45,7 @@ try: dsp = ossaudiodev.open('w') except IOError, msg: - if msg[0] in (errno.EACCES, errno.ENODEV, errno.EBUSY): + if msg[0] in (errno.EACCES, errno.ENOENT, errno.ENODEV, errno.EBUSY): raise TestSkipped, msg raise TestFailed, msg From python-checkins at python.org Thu Jan 5 08:17:39 2006 From: python-checkins at python.org (neal.norwitz) Date: Thu, 5 Jan 2006 08:17:39 +0100 (CET) Subject: [Python-checkins] r41921 - python/branches/release24-maint/Lib/test/test_linuxaudiodev.py python/branches/release24-maint/Lib/test/test_ossaudiodev.py Message-ID: <20060105071739.864851E4002@bag.python.org> Author: neal.norwitz Date: Thu Jan 5 08:17:35 2006 New Revision: 41921 Modified: python/branches/release24-maint/Lib/test/test_linuxaudiodev.py python/branches/release24-maint/Lib/test/test_ossaudiodev.py Log: Backport: If the audio file does not exist, the test should be skipped. Modified: python/branches/release24-maint/Lib/test/test_linuxaudiodev.py ============================================================================== --- python/branches/release24-maint/Lib/test/test_linuxaudiodev.py (original) +++ python/branches/release24-maint/Lib/test/test_linuxaudiodev.py Thu Jan 5 08:17:35 2006 @@ -28,7 +28,7 @@ try: a = linuxaudiodev.open('w') except linuxaudiodev.error, msg: - if msg[0] in (errno.EACCES, errno.ENODEV, errno.EBUSY): + if msg[0] in (errno.EACCES, errno.ENOENT, errno.ENODEV, errno.EBUSY): raise TestSkipped, msg raise TestFailed, msg Modified: python/branches/release24-maint/Lib/test/test_ossaudiodev.py ============================================================================== --- python/branches/release24-maint/Lib/test/test_ossaudiodev.py (original) +++ python/branches/release24-maint/Lib/test/test_ossaudiodev.py Thu Jan 5 08:17:35 2006 @@ -45,7 +45,7 @@ try: dsp = ossaudiodev.open('w') except IOError, msg: - if msg[0] in (errno.EACCES, errno.ENODEV, errno.EBUSY): + if msg[0] in (errno.EACCES, errno.ENOENT, errno.ENODEV, errno.EBUSY): raise TestSkipped, msg raise TestFailed, msg From python-checkins at python.org Thu Jan 5 09:00:58 2006 From: python-checkins at python.org (neal.norwitz) Date: Thu, 5 Jan 2006 09:00:58 +0100 (CET) Subject: [Python-checkins] r41922 - in python/branches/release24-maint: Misc/NEWS Modules/_bsddb.c Message-ID: <20060105080058.6E34B1E4022@bag.python.org> Author: neal.norwitz Date: Thu Jan 5 09:00:55 2006 New Revision: 41922 Modified: python/branches/release24-maint/Misc/NEWS python/branches/release24-maint/Modules/_bsddb.c Log: Backport 38951: fixes pybsddb SF bug id 1215432. DB.associate() would crash when a DBError was supposed to be raised. Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Thu Jan 5 09:00:55 2006 @@ -38,6 +38,9 @@ Extension Modules ----------------- +- Bug #1215432: in bsddb DB.associate() would crash when a DBError + was supposed to be raised. + - Fix 64-bit problems in bsddb. - Bug #1290333: Added a workaround for cjkcodecs' _codecs_cn build Modified: python/branches/release24-maint/Modules/_bsddb.c ============================================================================== --- python/branches/release24-maint/Modules/_bsddb.c (original) +++ python/branches/release24-maint/Modules/_bsddb.c Thu Jan 5 09:00:55 2006 @@ -1174,9 +1174,7 @@ } /* Save a reference to the callback in the secondary DB. */ - if (self->associateCallback != NULL) { - Py_DECREF(self->associateCallback); - } + Py_XDECREF(secondaryDB->associateCallback); Py_INCREF(callback); secondaryDB->associateCallback = callback; secondaryDB->primaryDBType = _DB_get_type(self); @@ -1210,8 +1208,8 @@ MYDB_END_ALLOW_THREADS; if (err) { - Py_DECREF(self->associateCallback); - self->associateCallback = NULL; + Py_XDECREF(secondaryDB->associateCallback); + secondaryDB->associateCallback = NULL; secondaryDB->primaryDBType = 0; } From nnorwitz at gmail.com Thu Jan 5 09:33:45 2006 From: nnorwitz at gmail.com (Neal Norwitz) Date: Thu, 5 Jan 2006 00:33:45 -0800 Subject: [Python-checkins] commit of r41907 - python/trunk/Makefile.pre.in In-Reply-To: <43BC44CE.8070000@acm.org> References: <20060103143057.4DB8E1E401C@bag.python.org> <43BB9562.2070404@acm.org> <200601050238.51766.anthony@interlink.com.au> <1136406410.10342.36.camel@geddy.wooz.org> <43BC44CE.8070000@acm.org> Message-ID: On 1/4/06, Sjoerd Mullender wrote: > > I assume the test is to see whether svnversion can be found in $PATH. > The test could be rewritten as > > if test -d $(srcdir)/.svn && which svnversion >/dev/null 2>&1; then ... I did something like this. It works on Solaris 10, but Skip's OSX box is still not happy. I'm not sure what I checked in is any more portable though. n From sjoerd at acm.org Thu Jan 5 10:07:27 2006 From: sjoerd at acm.org (Sjoerd Mullender) Date: Thu, 05 Jan 2006 10:07:27 +0100 Subject: [Python-checkins] commit of r41907 - python/trunk/Makefile.pre.in In-Reply-To: References: <20060103143057.4DB8E1E401C@bag.python.org> <43BB9562.2070404@acm.org> <200601050238.51766.anthony@interlink.com.au> <1136406410.10342.36.camel@geddy.wooz.org> <43BC44CE.8070000@acm.org> Message-ID: <43BCE1CF.3070603@acm.org> Neal Norwitz wrote: > On 1/4/06, Sjoerd Mullender wrote: > >>I assume the test is to see whether svnversion can be found in $PATH. >>The test could be rewritten as >> >>if test -d $(srcdir)/.svn && which svnversion >/dev/null 2>&1; then ... > > > I did something like this. It works on Solaris 10, but Skip's OSX box > is still not happy. I'm not sure what I checked in is any more > portable though. > > n Come to think of it, could the shell be the problem? make normally tells the shell to exit after an error (-e flag). More modern shells don't take this to mean to exit if a program inside the test of an if or while fails, but if I remember right, older shells (way old) did exit. In other words, sh -ec 'if false; then echo no; else echo yes; fi' should produce the text "yes", but if it doesn't, then the problem is in the shell. In contrast, sh -ec 'false; echo yes' should not produce "yes". If this is the problem, the solution is to add an "-" to the start of the command in the Makefile. -- Sjoerd Mullender -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 369 bytes Desc: OpenPGP digital signature Url : http://mail.python.org/pipermail/python-checkins/attachments/20060105/ead95165/attachment.pgp From martin at v.loewis.de Thu Jan 5 10:23:00 2006 From: martin at v.loewis.de (=?ISO-8859-1?Q?=22Martin_v=2E_L=F6wis=22?=) Date: Thu, 05 Jan 2006 10:23:00 +0100 Subject: [Python-checkins] r41919 - python/trunk/Makefile.pre.in In-Reply-To: <20060105063817.7A55C1E400A@bag.python.org> References: <20060105063817.7A55C1E400A@bag.python.org> Message-ID: <43BCE574.1000705@v.loewis.de> neal.norwitz wrote: > Try to make svnversion test more portable, based on Sjoerd's suggestion That apparently doesn't work: the OSX buildbot does not have svnversion in its path (yet), still "which" succeeds. Regards, Martin From python-checkins at python.org Thu Jan 5 11:00:44 2006 From: python-checkins at python.org (martin.v.loewis) Date: Thu, 5 Jan 2006 11:00:44 +0100 (CET) Subject: [Python-checkins] r41923 - in python/trunk: Makefile.pre.in Modules/getbuildinfo.c Objects/unicodeobject.c configure configure.in Message-ID: <20060105100044.207141E4017@bag.python.org> Author: martin.v.loewis Date: Thu Jan 5 11:00:36 2006 New Revision: 41923 Modified: python/trunk/Makefile.pre.in python/trunk/Modules/getbuildinfo.c (contents, props changed) python/trunk/Objects/unicodeobject.c python/trunk/configure python/trunk/configure.in Log: Stop maintaining the buildno file. Modified: python/trunk/Makefile.pre.in ============================================================================== --- python/trunk/Makefile.pre.in (original) +++ python/trunk/Makefile.pre.in Thu Jan 5 11:00:36 2006 @@ -33,6 +33,7 @@ LINKCC= @LINKCC@ AR= @AR@ RANLIB= @RANLIB@ +SVNVERSION= @SVNVERSION@ # Shell used by make (some versions default to the login shell, which is bad) SHELL= /bin/sh @@ -341,21 +342,6 @@ *) $(RUNSHARED) CC='$(CC)' LDSHARED='$(BLDSHARED)' OPT='$(OPT)' ./$(BUILDPYTHON) -E $(srcdir)/setup.py build;; \ esac -# buildno should really depend on something like LIBRARY_SRC -buildno: $(PARSER_OBJS) \ - $(OBJECT_OBJS) \ - $(PYTHON_OBJS) \ - $(MODULE_OBJS) \ - $(SIGNAL_OBJS) \ - $(MODOBJS) \ - $(srcdir)/Modules/getbuildinfo.c - if test -d $(srcdir)/.svn -a "`which svnversion 2> /dev/null`"; then \ - svnversion $(srcdir) >buildno; \ - elif test -f buildno; then \ - expr `cat buildno` + 1 >buildno1; \ - mv -f buildno1 buildno; \ - else echo 1 >buildno; fi - # Build static library # avoid long command lines, same as LIBRARY_OBJS $(LIBRARY): $(LIBRARY_OBJS) @@ -445,8 +431,14 @@ ############################################################################ # Special rules for object files -Modules/getbuildinfo.o: $(srcdir)/Modules/getbuildinfo.c buildno - $(CC) -c $(PY_CFLAGS) -DBUILD=\"`cat buildno`\" -o $@ $(srcdir)/Modules/getbuildinfo.c +Modules/getbuildinfo.o: $(PARSER_OBJS) \ + $(OBJECT_OBJS) \ + $(PYTHON_OBJS) \ + $(MODULE_OBJS) \ + $(SIGNAL_OBJS) \ + $(MODOBJS) \ + $(srcdir)/Modules/getbuildinfo.c + $(CC) -c $(PY_CFLAGS) -DSVNVERSION=\"`LANG=C $(SVNVERSION) $(srcdir)`\" -o $@ $(srcdir)/Modules/getbuildinfo.c Modules/getpath.o: $(srcdir)/Modules/getpath.c Makefile $(CC) -c $(PY_CFLAGS) -DPYTHONPATH='"$(PYTHONPATH)"' \ @@ -990,7 +982,7 @@ # remove all generated files, even Makefile[.pre] # Keep configure and Python-ast.[ch], it's possible they can't be generated distclean: clobber - -rm -f core Makefile Makefile.pre buildno config.status \ + -rm -f core Makefile Makefile.pre config.status \ Modules/Setup Modules/Setup.local Modules/Setup.config find $(srcdir) '(' -name '*.fdc' -o -name '*~' \ -o -name '[@,#]*' -o -name '*.old' \ Modified: python/trunk/Modules/getbuildinfo.c ============================================================================== --- python/trunk/Modules/getbuildinfo.c (original) +++ python/trunk/Modules/getbuildinfo.c Thu Jan 5 11:00:36 2006 @@ -20,21 +20,30 @@ #endif #endif -#ifndef BUILD -#define BUILD "0" -#endif +static const char revision[] = "$Revision$"; +static const char headurl[] = "$HeadURL$"; const char * Py_GetBuildInfo(void) { static char buildinfo[50]; +#ifdef SVNVERSION + static char svnversion[] = SVNVERSION; +#else + static char svnversion[20] = "unknown"; + if (strstr(headurl, "/tags/") != NULL) { + int start = ; + strncpy(svnversion, revision+start, stop-start); + svnversion[stop-start] = '\0'; + } +#endif PyOS_snprintf(buildinfo, sizeof(buildinfo), - "%s, %.20s, %.9s", BUILD, DATE, TIME); + "%s, %.20s, %.9s", svnversion, DATE, TIME); return buildinfo; } const char * Py_GetBuildNumber(void) { - return BUILD; + return "0"; } Modified: python/trunk/Objects/unicodeobject.c ============================================================================== --- python/trunk/Objects/unicodeobject.c (original) +++ python/trunk/Objects/unicodeobject.c Thu Jan 5 11:00:36 2006 @@ -5357,7 +5357,7 @@ return PyBool_FromLong(Py_UNICODE_ISLOWER(*p)); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -5391,7 +5391,7 @@ return PyBool_FromLong(Py_UNICODE_ISUPPER(*p) != 0); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -5428,7 +5428,7 @@ (Py_UNICODE_ISUPPER(*p) != 0)); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -5473,7 +5473,7 @@ return PyBool_FromLong(1); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -5502,7 +5502,7 @@ return PyBool_FromLong(1); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -5531,7 +5531,7 @@ return PyBool_FromLong(1); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -5560,7 +5560,7 @@ return PyBool_FromLong(1); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -5589,7 +5589,7 @@ return PyBool_FromLong(1); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -5618,7 +5618,7 @@ return PyBool_FromLong(1); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -6453,14 +6453,14 @@ if (PyInt_Check(item)) { long i = PyInt_AS_LONG(item); if (i < 0) - i += PyString_GET_SIZE(self); + i += PyUnicode_GET_SIZE(self); return unicode_getitem(self, i); } else if (PyLong_Check(item)) { long i = PyLong_AsLong(item); if (i == -1 && PyErr_Occurred()) return NULL; if (i < 0) - i += PyString_GET_SIZE(self); + i += PyUnicode_GET_SIZE(self); return unicode_getitem(self, i); } else if (PySlice_Check(item)) { int start, stop, step, slicelength, cur, i; @@ -6468,7 +6468,7 @@ Py_UNICODE* result_buf; PyObject* result; - if (PySlice_GetIndicesEx((PySliceObject*)item, PyString_GET_SIZE(self), + if (PySlice_GetIndicesEx((PySliceObject*)item, PyUnicode_GET_SIZE(self), &start, &stop, &step, &slicelength) < 0) { return NULL; } @@ -6478,6 +6478,9 @@ } else { source_buf = PyUnicode_AS_UNICODE((PyObject*)self); result_buf = PyMem_MALLOC(slicelength*sizeof(Py_UNICODE)); + + if (result_buf == NULL) + return PyErr_NoMemory(); for (cur = start, i = 0; i < slicelength; cur += step, i++) { result_buf[i] = source_buf[cur]; Modified: python/trunk/configure ============================================================================== --- python/trunk/configure (original) +++ python/trunk/configure Thu Jan 5 11:00:36 2006 @@ -1,5 +1,5 @@ #! /bin/sh -# From configure.in Revision: 41764 . +# From configure.in Revision: 41852 . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.59 for python 2.5. # @@ -312,7 +312,7 @@ # include #endif" -ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS VERSION SOVERSION CONFIG_ARGS PYTHONFRAMEWORK PYTHONFRAMEWORKDIR PYTHONFRAMEWORKPREFIX PYTHONFRAMEWORKINSTALLDIR MACHDEP SGI_ABI EXTRAPLATDIR EXTRAMACHDEPPATH CONFIGURE_MACOSX_DEPLOYMENT_TARGET CXX MAINOBJ EXEEXT CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC OBJEXT CPP EGREP BUILDEXEEXT LIBRARY LDLIBRARY DLLLIBRARY BLDLIBRARY LDLIBRARYDIR INSTSONAME RUNSHARED LINKCC RANLIB ac_ct_RANLIB AR INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN OPT BASECFLAGS OTHER_LIBTOOL_OPT LIBTOOL_CRUFT SO LDSHARED BLDSHARED CCSHARED LINKFORSHARED CFLAGSFORSHARED SHLIBS USE_SIGNAL_MODULE SIGNAL_OBJS USE_THREAD_MODULE LDLAST THREADOBJ DLINCLDIR DYNLOADFILE MACHDEP_OBJS TRUE LIBOBJS HAVE_GETHOSTBYNAME_R_6_ARG HAVE_GETHOSTBYNAME_R_5_ARG HAVE_GETHOSTBYNAME_R_3_ARG HAVE_GETHOSTBYNAME_R HAVE_GETHOSTBYNAME LIBM LIBC UNICODE_OBJS THREADHEADERS SRCDIRS LTLIBOBJS' +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS VERSION SOVERSION CONFIG_ARGS PYTHONFRAMEWORK PYTHONFRAMEWORKDIR PYTHONFRAMEWORKPREFIX PYTHONFRAMEWORKINSTALLDIR MACHDEP SGI_ABI EXTRAPLATDIR EXTRAMACHDEPPATH CONFIGURE_MACOSX_DEPLOYMENT_TARGET CXX MAINOBJ EXEEXT CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC OBJEXT CPP EGREP BUILDEXEEXT LIBRARY LDLIBRARY DLLLIBRARY BLDLIBRARY LDLIBRARYDIR INSTSONAME RUNSHARED LINKCC RANLIB ac_ct_RANLIB AR SVNVERSION INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN OPT BASECFLAGS OTHER_LIBTOOL_OPT LIBTOOL_CRUFT SO LDSHARED BLDSHARED CCSHARED LINKFORSHARED CFLAGSFORSHARED SHLIBS USE_SIGNAL_MODULE SIGNAL_OBJS USE_THREAD_MODULE LDLAST THREADOBJ DLINCLDIR DYNLOADFILE MACHDEP_OBJS TRUE LIBOBJS HAVE_GETHOSTBYNAME_R_6_ARG HAVE_GETHOSTBYNAME_R_5_ARG HAVE_GETHOSTBYNAME_R_3_ARG HAVE_GETHOSTBYNAME_R HAVE_GETHOSTBYNAME LIBM LIBC UNICODE_OBJS THREADHEADERS SRCDIRS LTLIBOBJS' ac_subst_files='' # Initialize some variables set by options. @@ -3575,6 +3575,49 @@ test -n "$AR" || AR="ar" + +for ac_prog in svnversion +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_SVNVERSION+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$SVNVERSION"; then + ac_cv_prog_SVNVERSION="$SVNVERSION" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_SVNVERSION="$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +SVNVERSION=$ac_cv_prog_SVNVERSION +if test -n "$SVNVERSION"; then + echo "$as_me:$LINENO: result: $SVNVERSION" >&5 +echo "${ECHO_T}$SVNVERSION" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$SVNVERSION" && break +done +test -n "$SVNVERSION" || SVNVERSION="echo no svnversion" + + case $MACHDEP in bsdos*|hp*|HP*) # install -d does not work on BSDI or HP-UX @@ -21477,6 +21520,7 @@ s, at RANLIB@,$RANLIB,;t t s, at ac_ct_RANLIB@,$ac_ct_RANLIB,;t t s, at AR@,$AR,;t t +s, at SVNVERSION@,$SVNVERSION,;t t s, at INSTALL_PROGRAM@,$INSTALL_PROGRAM,;t t s, at INSTALL_SCRIPT@,$INSTALL_SCRIPT,;t t s, at INSTALL_DATA@,$INSTALL_DATA,;t t Modified: python/trunk/configure.in ============================================================================== --- python/trunk/configure.in (original) +++ python/trunk/configure.in Thu Jan 5 11:00:36 2006 @@ -618,6 +618,9 @@ AC_SUBST(AR) AC_CHECK_PROGS(AR, ar aal, ar) +AC_SUBST(SVNVERSION) +AC_CHECK_PROGS(SVNVERSION, svnversion, [echo no svnversion]) + case $MACHDEP in bsdos*|hp*|HP*) # install -d does not work on BSDI or HP-UX From mal at egenix.com Thu Jan 5 11:31:40 2006 From: mal at egenix.com (M.-A. Lemburg) Date: Thu, 05 Jan 2006 11:31:40 +0100 Subject: [Python-checkins] r41923 - in python/trunk: Makefile.pre.in Modules/getbuildinfo.c Objects/unicodeobject.c configure configure.in In-Reply-To: <20060105100044.207141E4017@bag.python.org> References: <20060105100044.207141E4017@bag.python.org> Message-ID: <43BCF58C.2040307@egenix.com> martin.v.loewis wrote: > Author: martin.v.loewis > Date: Thu Jan 5 11:00:36 2006 > New Revision: 41923 > > Modified: > python/trunk/Makefile.pre.in > python/trunk/Modules/getbuildinfo.c (contents, props changed) > python/trunk/Objects/unicodeobject.c > python/trunk/configure > python/trunk/configure.in > Log: > Stop maintaining the buildno file. This checkin does not seems to have anything to do with the buildno... Nevertheless, it's still fixing a potential bug :-) Looks like a copy&paste error. > Modified: python/trunk/Objects/unicodeobject.c > ============================================================================== > --- python/trunk/Objects/unicodeobject.c (original) > +++ python/trunk/Objects/unicodeobject.c Thu Jan 5 11:00:36 2006 > @@ -5357,7 +5357,7 @@ > return PyBool_FromLong(Py_UNICODE_ISLOWER(*p)); > > /* Special case for empty strings */ > - if (PyString_GET_SIZE(self) == 0) > + if (PyUnicode_GET_SIZE(self) == 0) > return PyBool_FromLong(0); > > e = p + PyUnicode_GET_SIZE(self); > @@ -5391,7 +5391,7 @@ > return PyBool_FromLong(Py_UNICODE_ISUPPER(*p) != 0); > > /* Special case for empty strings */ > - if (PyString_GET_SIZE(self) == 0) > + if (PyUnicode_GET_SIZE(self) == 0) > return PyBool_FromLong(0); > > e = p + PyUnicode_GET_SIZE(self); > @@ -5428,7 +5428,7 @@ > (Py_UNICODE_ISUPPER(*p) != 0)); > > /* Special case for empty strings */ > - if (PyString_GET_SIZE(self) == 0) > + if (PyUnicode_GET_SIZE(self) == 0) > return PyBool_FromLong(0); > > e = p + PyUnicode_GET_SIZE(self); > @@ -5473,7 +5473,7 @@ > return PyBool_FromLong(1); > > /* Special case for empty strings */ > - if (PyString_GET_SIZE(self) == 0) > + if (PyUnicode_GET_SIZE(self) == 0) > return PyBool_FromLong(0); > > e = p + PyUnicode_GET_SIZE(self); > @@ -5502,7 +5502,7 @@ > return PyBool_FromLong(1); > > /* Special case for empty strings */ > - if (PyString_GET_SIZE(self) == 0) > + if (PyUnicode_GET_SIZE(self) == 0) > return PyBool_FromLong(0); > > e = p + PyUnicode_GET_SIZE(self); > @@ -5531,7 +5531,7 @@ > return PyBool_FromLong(1); > > /* Special case for empty strings */ > - if (PyString_GET_SIZE(self) == 0) > + if (PyUnicode_GET_SIZE(self) == 0) > return PyBool_FromLong(0); > > e = p + PyUnicode_GET_SIZE(self); > @@ -5560,7 +5560,7 @@ > return PyBool_FromLong(1); > > /* Special case for empty strings */ > - if (PyString_GET_SIZE(self) == 0) > + if (PyUnicode_GET_SIZE(self) == 0) > return PyBool_FromLong(0); > > e = p + PyUnicode_GET_SIZE(self); > @@ -5589,7 +5589,7 @@ > return PyBool_FromLong(1); > > /* Special case for empty strings */ > - if (PyString_GET_SIZE(self) == 0) > + if (PyUnicode_GET_SIZE(self) == 0) > return PyBool_FromLong(0); > > e = p + PyUnicode_GET_SIZE(self); > @@ -5618,7 +5618,7 @@ > return PyBool_FromLong(1); > > /* Special case for empty strings */ > - if (PyString_GET_SIZE(self) == 0) > + if (PyUnicode_GET_SIZE(self) == 0) > return PyBool_FromLong(0); > > e = p + PyUnicode_GET_SIZE(self); > @@ -6453,14 +6453,14 @@ > if (PyInt_Check(item)) { > long i = PyInt_AS_LONG(item); > if (i < 0) > - i += PyString_GET_SIZE(self); > + i += PyUnicode_GET_SIZE(self); > return unicode_getitem(self, i); > } else if (PyLong_Check(item)) { > long i = PyLong_AsLong(item); > if (i == -1 && PyErr_Occurred()) > return NULL; > if (i < 0) > - i += PyString_GET_SIZE(self); > + i += PyUnicode_GET_SIZE(self); > return unicode_getitem(self, i); > } else if (PySlice_Check(item)) { > int start, stop, step, slicelength, cur, i; > @@ -6468,7 +6468,7 @@ > Py_UNICODE* result_buf; > PyObject* result; > > - if (PySlice_GetIndicesEx((PySliceObject*)item, PyString_GET_SIZE(self), > + if (PySlice_GetIndicesEx((PySliceObject*)item, PyUnicode_GET_SIZE(self), > &start, &stop, &step, &slicelength) < 0) { > return NULL; > } > @@ -6478,6 +6478,9 @@ > } else { > source_buf = PyUnicode_AS_UNICODE((PyObject*)self); > result_buf = PyMem_MALLOC(slicelength*sizeof(Py_UNICODE)); > + > + if (result_buf == NULL) > + return PyErr_NoMemory(); > > for (cur = start, i = 0; i < slicelength; cur += step, i++) { > result_buf[i] = source_buf[cur]; > > Modified: python/trunk/configure > ============================================================================== > --- python/trunk/configure (original) > +++ python/trunk/configure Thu Jan 5 11:00:36 2006 > @@ -1,5 +1,5 @@ > #! /bin/sh > -# From configure.in Revision: 41764 . > +# From configure.in Revision: 41852 . > # Guess values for system-dependent variables and create Makefiles. > # Generated by GNU Autoconf 2.59 for python 2.5. > # > @@ -312,7 +312,7 @@ > # include > #endif" > > -ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS VERSION SOVERSION CONFIG_ARGS PYTHONFRAMEWORK PYTHONFRAMEWORKDIR PYTHONFRAMEWORKPREFIX PYTHONFRAMEWORKINSTALLDIR MACHDEP SGI_ABI EXTRAPLATDIR EXTRAMACHDEPPATH CONFIGURE_MACOSX_DEPLOYMENT_TARGET CXX MAINOBJ EXEEXT CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC OBJEXT CPP EGREP BUILDEXEEXT LIBRARY LDLIBRARY DLLLIBRARY BLDLIBRARY LDLIBRARYDIR INSTSONAME RUNSHARED LINKCC RANLIB ac_ct_RANLIB AR INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN OPT BASECFLAGS OTHER_LIBTOOL_OPT LIBTOOL_CRUFT SO LDSHARED BLDSHARED CCSHARED LINKFORSHARED CFLAGSFORSHARED SHLIBS USE_SIGNAL_MODULE SIGNAL_OBJS USE_THREAD_MODULE LDLAST THREADOBJ DLINCLDIR DYNLOADFILE MACHDEP_OBJS ! > TRUE LIBOBJS HAVE_GETHOSTBYNAME_R_6_ARG HAVE_GETHOSTBYNAME_R_5_ARG HAVE_GETHOSTBYNAME_R_3_ARG HAVE_GETHOSTBYNAME_R HAVE_GETHOSTBYNAME LIBM LIBC UNICODE_OBJS THREADHEADERS SRCDIRS LTLIBOBJS' > +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS VERSION SOVERSION CONFIG_ARGS PYTHONFRAMEWORK PYTHONFRAMEWORKDIR PYTHONFRAMEWORKPREFIX PYTHONFRAMEWORKINSTALLDIR MACHDEP SGI_ABI EXTRAPLATDIR EXTRAMACHDEPPATH CONFIGURE_MACOSX_DEPLOYMENT_TARGET CXX MAINOBJ EXEEXT CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC OBJEXT CPP EGREP BUILDEXEEXT LIBRARY LDLIBRARY DLLLIBRARY BLDLIBRARY LDLIBRARYDIR INSTSONAME RUNSHARED LINKCC RANLIB ac_ct_RANLIB AR SVNVERSION INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN OPT BASECFLAGS OTHER_LIBTOOL_OPT LIBTOOL_CRUFT SO LDSHARED BLDSHARED CCSHARED LINKFORSHARED CFLAGSFORSHARED SHLIBS USE_SIGNAL_MODULE SIGNAL_OBJS USE_THREAD_MODULE LDLAST THREADOBJ DLINCLDIR DYNLOADFILE M A! > CHDEP_OBJS TRUE LIBOBJS HAVE_GETHOSTBYNAME_R_6_ARG HAVE_GETHOSTBYNAME_R_5_ARG HAVE_GETHOSTBYNAME_R_3_ARG HAVE_GETHOSTBYNAME_R HAVE_GETHOSTBYNAME LIBM LIBC UNICODE_OBJS THREADHEADERS SRCDIRS LTLIBOBJS' > ac_subst_files='' > > # Initialize some variables set by options. > @@ -3575,6 +3575,49 @@ > test -n "$AR" || AR="ar" > > > + > +for ac_prog in svnversion > +do > + # Extract the first word of "$ac_prog", so it can be a program name with args. > +set dummy $ac_prog; ac_word=$2 > +echo "$as_me:$LINENO: checking for $ac_word" >&5 > +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 > +if test "${ac_cv_prog_SVNVERSION+set}" = set; then > + echo $ECHO_N "(cached) $ECHO_C" >&6 > +else > + if test -n "$SVNVERSION"; then > + ac_cv_prog_SVNVERSION="$SVNVERSION" # Let the user override the test. > +else > +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR > +for as_dir in $PATH > +do > + IFS=$as_save_IFS > + test -z "$as_dir" && as_dir=. > + for ac_exec_ext in '' $ac_executable_extensions; do > + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then > + ac_cv_prog_SVNVERSION="$ac_prog" > + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 > + break 2 > + fi > +done > +done > + > +fi > +fi > +SVNVERSION=$ac_cv_prog_SVNVERSION > +if test -n "$SVNVERSION"; then > + echo "$as_me:$LINENO: result: $SVNVERSION" >&5 > +echo "${ECHO_T}$SVNVERSION" >&6 > +else > + echo "$as_me:$LINENO: result: no" >&5 > +echo "${ECHO_T}no" >&6 > +fi > + > + test -n "$SVNVERSION" && break > +done > +test -n "$SVNVERSION" || SVNVERSION="echo no svnversion" > + > + > case $MACHDEP in > bsdos*|hp*|HP*) > # install -d does not work on BSDI or HP-UX > @@ -21477,6 +21520,7 @@ > s, at RANLIB@,$RANLIB,;t t > s, at ac_ct_RANLIB@,$ac_ct_RANLIB,;t t > s, at AR@,$AR,;t t > +s, at SVNVERSION@,$SVNVERSION,;t t > s, at INSTALL_PROGRAM@,$INSTALL_PROGRAM,;t t > s, at INSTALL_SCRIPT@,$INSTALL_SCRIPT,;t t > s, at INSTALL_DATA@,$INSTALL_DATA,;t t > > Modified: python/trunk/configure.in > ============================================================================== > --- python/trunk/configure.in (original) > +++ python/trunk/configure.in Thu Jan 5 11:00:36 2006 > @@ -618,6 +618,9 @@ > AC_SUBST(AR) > AC_CHECK_PROGS(AR, ar aal, ar) > > +AC_SUBST(SVNVERSION) > +AC_CHECK_PROGS(SVNVERSION, svnversion, [echo no svnversion]) > + > case $MACHDEP in > bsdos*|hp*|HP*) > # install -d does not work on BSDI or HP-UX > _______________________________________________ > Python-checkins mailing list > Python-checkins at python.org > http://mail.python.org/mailman/listinfo/python-checkins -- Marc-Andre Lemburg eGenix.com Professional Python Services directly from the Source (#1, Jan 05 2006) >>> Python/Zope Consulting and Support ... http://www.egenix.com/ >>> mxODBC.Zope.Database.Adapter ... http://zope.egenix.com/ >>> mxODBC, mxDateTime, mxTextTools ... http://python.egenix.com/ ________________________________________________________________________ ::: Try mxODBC.Zope.DA for Windows,Linux,Solaris,FreeBSD for free ! :::: From python-checkins at python.org Thu Jan 5 11:34:02 2006 From: python-checkins at python.org (martin.v.loewis) Date: Thu, 5 Jan 2006 11:34:02 +0100 (CET) Subject: [Python-checkins] r41924 - in python/trunk: Makefile.pre.in Modules/getbuildinfo.c configure configure.in Message-ID: <20060105103402.05E061E4007@bag.python.org> Author: martin.v.loewis Date: Thu Jan 5 11:33:59 2006 New Revision: 41924 Modified: python/trunk/Makefile.pre.in python/trunk/Modules/getbuildinfo.c python/trunk/configure python/trunk/configure.in Log: Always set SVNVERSION to "exported" if it otherwise fails. Modified: python/trunk/Makefile.pre.in ============================================================================== --- python/trunk/Makefile.pre.in (original) +++ python/trunk/Makefile.pre.in Thu Jan 5 11:33:59 2006 @@ -438,7 +438,7 @@ $(SIGNAL_OBJS) \ $(MODOBJS) \ $(srcdir)/Modules/getbuildinfo.c - $(CC) -c $(PY_CFLAGS) -DSVNVERSION=\"`LANG=C $(SVNVERSION) $(srcdir)`\" -o $@ $(srcdir)/Modules/getbuildinfo.c + $(CC) -c $(PY_CFLAGS) -DSVNVERSION=\"`LANG=C $(SVNVERSION)`\" -o $@ $(srcdir)/Modules/getbuildinfo.c Modules/getpath.o: $(srcdir)/Modules/getpath.c Makefile $(CC) -c $(PY_CFLAGS) -DPYTHONPATH='"$(PYTHONPATH)"' \ Modified: python/trunk/Modules/getbuildinfo.c ============================================================================== --- python/trunk/Modules/getbuildinfo.c (original) +++ python/trunk/Modules/getbuildinfo.c Thu Jan 5 11:33:59 2006 @@ -28,15 +28,17 @@ { static char buildinfo[50]; #ifdef SVNVERSION - static char svnversion[] = SVNVERSION; + static char svnversion[50] = SVNVERSION; #else - static char svnversion[20] = "unknown"; - if (strstr(headurl, "/tags/") != NULL) { - int start = ; + static char svnversion[50] = "exported"; +#endif + if (strcmp(svnversion, "exported") == 0 && + strstr(headurl, "/tags/") != NULL) { + int start = 11; + int stop = strlen(revision)-2; strncpy(svnversion, revision+start, stop-start); svnversion[stop-start] = '\0'; } -#endif PyOS_snprintf(buildinfo, sizeof(buildinfo), "%s, %.20s, %.9s", svnversion, DATE, TIME); return buildinfo; Modified: python/trunk/configure ============================================================================== --- python/trunk/configure (original) +++ python/trunk/configure Thu Jan 5 11:33:59 2006 @@ -1,5 +1,5 @@ #! /bin/sh -# From configure.in Revision: 41852 . +# From configure.in Revision: 41923 . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.59 for python 2.5. # @@ -3576,10 +3576,8 @@ -for ac_prog in svnversion -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 +# Extract the first word of "svnversion", so it can be a program name with args. +set dummy svnversion; ac_word=$2 echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_SVNVERSION+set}" = set; then @@ -3595,7 +3593,7 @@ test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_SVNVERSION="$ac_prog" + ac_cv_prog_SVNVERSION="found" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi @@ -3613,10 +3611,12 @@ echo "${ECHO_T}no" >&6 fi - test -n "$SVNVERSION" && break -done -test -n "$SVNVERSION" || SVNVERSION="echo no svnversion" - +if test $SVNVERSION = found +then + SVNVERSION="svnversion \$(srcdir)" +else + SVNVERSION="echo exported" +fi case $MACHDEP in bsdos*|hp*|HP*) Modified: python/trunk/configure.in ============================================================================== --- python/trunk/configure.in (original) +++ python/trunk/configure.in Thu Jan 5 11:33:59 2006 @@ -619,7 +619,13 @@ AC_CHECK_PROGS(AR, ar aal, ar) AC_SUBST(SVNVERSION) -AC_CHECK_PROGS(SVNVERSION, svnversion, [echo no svnversion]) +AC_CHECK_PROG(SVNVERSION, svnversion, found) +if test $SVNVERSION = found +then + SVNVERSION="svnversion \$(srcdir)" +else + SVNVERSION="echo exported" +fi case $MACHDEP in bsdos*|hp*|HP*) From python-checkins at python.org Thu Jan 5 11:38:54 2006 From: python-checkins at python.org (martin.v.loewis) Date: Thu, 5 Jan 2006 11:38:54 +0100 (CET) Subject: [Python-checkins] r41923 - svn:log Message-ID: <20060105103854.C67B61E4002@bag.python.org> Author: martin.v.loewis Revision: 41923 Property Name: svn:log New Property Value: Stop maintaining the buildno file. Also, stop determining Unicode sizes with PyString_GET_SIZE. From martin at v.loewis.de Thu Jan 5 11:39:45 2006 From: martin at v.loewis.de (=?ISO-8859-1?Q?=22Martin_v=2E_L=F6wis=22?=) Date: Thu, 05 Jan 2006 11:39:45 +0100 Subject: [Python-checkins] - in python/trunk: Makefile.pre.in Modules/getbuildinfo.c Objects/unicodeobject.c configure configure.in In-Reply-To: <43BCF58C.2040307@egenix.com> References: <20060105100044.207141E4017@bag.python.org> <43BCF58C.2040307@egenix.com> Message-ID: <43BCF771.8090702@v.loewis.de> M.-A. Lemburg wrote: > This checkin does not seems to have anything to do with > the buildno... > > Nevertheless, it's still fixing a potential bug :-) > Looks like a copy&paste error. Oops, right, that was a change that was still sitting in my sandbox. I adjusted the commit message. Regards, Martin From skip at pobox.com Thu Jan 5 11:47:37 2006 From: skip at pobox.com (skip at pobox.com) Date: Thu, 5 Jan 2006 04:47:37 -0600 Subject: [Python-checkins] commit of r41907 - python/trunk/Makefile.pre.in In-Reply-To: References: <20060103143057.4DB8E1E401C@bag.python.org> <43BB9562.2070404@acm.org> <200601050238.51766.anthony@interlink.com.au> <1136406410.10342.36.camel@geddy.wooz.org> <43BC44CE.8070000@acm.org> Message-ID: <17340.63817.672805.755128@montanaro.dyndns.org> Neal> I did something like this. It works on Solaris 10, but Skip's OSX Neal> box is still not happy. I think that's a config issue on my Mac. I installed the Metissian bundle of svn which installs svn* in /usr/local/bin. That's not on the PATH that buildbot gets when executed via cron's @reboot. I just modified that and rebooted the system. Things are looking better. It's in the midst of a regrtest run at the moment. Skip From python-checkins at python.org Thu Jan 5 11:49:13 2006 From: python-checkins at python.org (skip.montanaro) Date: Thu, 5 Jan 2006 11:49:13 +0100 (CET) Subject: [Python-checkins] r41925 - python/trunk/Modules/getpath.c Message-ID: <20060105104913.CFBA21E4002@bag.python.org> Author: skip.montanaro Date: Thu Jan 5 11:49:13 2006 New Revision: 41925 Modified: python/trunk/Modules/getpath.c Log: squash compiler warning on Mac OSX 10.3 Modified: python/trunk/Modules/getpath.c ============================================================================== --- python/trunk/Modules/getpath.c (original) +++ python/trunk/Modules/getpath.c Thu Jan 5 11:49:13 2006 @@ -381,7 +381,11 @@ NSModule pythonModule; #endif #ifdef __APPLE__ +#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4 uint32_t nsexeclength = MAXPATHLEN; +#else + unsigned long nsexeclength = MAXPATHLEN; +#endif #endif /* If there is no slash in the argv0 path, then we have to From python-checkins at python.org Thu Jan 5 11:51:20 2006 From: python-checkins at python.org (skip.montanaro) Date: Thu, 5 Jan 2006 11:51:20 +0100 (CET) Subject: [Python-checkins] r41926 - python/branches/release24-maint/Modules/getpath.c Message-ID: <20060105105120.53E151E4002@bag.python.org> Author: skip.montanaro Date: Thu Jan 5 11:51:18 2006 New Revision: 41926 Modified: python/branches/release24-maint/Modules/getpath.c Log: backport: squash compiler warning on Mac OSX 10.3 Modified: python/branches/release24-maint/Modules/getpath.c ============================================================================== --- python/branches/release24-maint/Modules/getpath.c (original) +++ python/branches/release24-maint/Modules/getpath.c Thu Jan 5 11:51:18 2006 @@ -381,8 +381,12 @@ NSModule pythonModule; #endif #ifdef __APPLE__ +#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4 + uint32_t nsexeclength = MAXPATHLEN; +#else unsigned long nsexeclength = MAXPATHLEN; #endif +#endif /* If there is no slash in the argv0 path, then we have to * assume python is on the user's $PATH, since there's no From skip at pobox.com Thu Jan 5 11:54:59 2006 From: skip at pobox.com (skip at pobox.com) Date: Thu, 5 Jan 2006 04:54:59 -0600 Subject: [Python-checkins] commit of r41907 - python/trunk/Makefile.pre.in In-Reply-To: <17340.63817.672805.755128@montanaro.dyndns.org> References: <20060103143057.4DB8E1E401C@bag.python.org> <43BB9562.2070404@acm.org> <200601050238.51766.anthony@interlink.com.au> <1136406410.10342.36.camel@geddy.wooz.org> <43BC44CE.8070000@acm.org> <17340.63817.672805.755128@montanaro.dyndns.org> Message-ID: <17340.64259.356277.742037@montanaro.dyndns.org> me> I just modified that and rebooted the system. Things are looking me> better. It's in the midst of a regrtest run at the moment. Except now it's failing test_curses. I thought I saw a checkin that would cause that test to be skipped if not run on a tty. Skip From nnorwitz at gmail.com Thu Jan 5 18:54:56 2006 From: nnorwitz at gmail.com (Neal Norwitz) Date: Thu, 5 Jan 2006 09:54:56 -0800 Subject: [Python-checkins] commit of r41907 - python/trunk/Makefile.pre.in In-Reply-To: <17340.64259.356277.742037@montanaro.dyndns.org> References: <20060103143057.4DB8E1E401C@bag.python.org> <43BB9562.2070404@acm.org> <200601050238.51766.anthony@interlink.com.au> <1136406410.10342.36.camel@geddy.wooz.org> <43BC44CE.8070000@acm.org> <17340.63817.672805.755128@montanaro.dyndns.org> <17340.64259.356277.742037@montanaro.dyndns.org> Message-ID: On 1/5/06, skip at pobox.com wrote: > > me> I just modified that and rebooted the system. Things are looking > me> better. It's in the midst of a regrtest run at the moment. Martin fixed that. Thanks Martin! > Except now it's failing test_curses. I thought I saw a checkin that would > cause that test to be skipped if not run on a tty. Yes, I think I botched it though. I used stdin instead of stdout. Also, I'm not sure if it will work when run from buildbot since PTYs are used. I'm not sure if that affects this or not. I'm going to try to fix that tonight. n From trentm at ActiveState.com Thu Jan 5 20:14:13 2006 From: trentm at ActiveState.com (Trent Mick) Date: Thu, 5 Jan 2006 11:14:13 -0800 Subject: [Python-checkins] r41919 - python/trunk/Makefile.pre.in In-Reply-To: <43BCE574.1000705@v.loewis.de> References: <20060105063817.7A55C1E400A@bag.python.org> <43BCE574.1000705@v.loewis.de> Message-ID: <20060105191413.GA7896@activestate.com> [Martin v. Loewis wrote] > neal.norwitz wrote: > > Try to make svnversion test more portable, based on Sjoerd's suggestion > > That apparently doesn't work: the OSX buildbot does not have svnversion > in its path (yet), still "which" succeeds. I use this bash function to use which on Linux, Solaris, OSX and other Un*x: function is_on_path() { # Return 0 if the command is found on the PATH, non-zero otherwise. # GNU which behaves this way, some (notably on Mac OS X) don't. result="`\which $1`" if test -e "$result"; then #echo is_on_path\($1\) returning 0 return 0 else #echo is_on_path\($1\) returning 1 return 1 fi } the '\' before 'which' is to NOT use aliases. I've found that sometimes which was aliased to "type -p" or something and caused me problems... though I don't remember the exact details. Trent -- Trent Mick trentm at activestate.com From martin at v.loewis.de Thu Jan 5 20:15:06 2006 From: martin at v.loewis.de (=?ISO-8859-1?Q?=22Martin_v=2E_L=F6wis=22?=) Date: Thu, 05 Jan 2006 20:15:06 +0100 Subject: [Python-checkins] commit of r41907 - python/trunk/Makefile.pre.in In-Reply-To: <17340.64259.356277.742037@montanaro.dyndns.org> References: <20060103143057.4DB8E1E401C@bag.python.org> <43BB9562.2070404@acm.org> <200601050238.51766.anthony@interlink.com.au> <1136406410.10342.36.camel@geddy.wooz.org> <43BC44CE.8070000@acm.org> <17340.63817.672805.755128@montanaro.dyndns.org> <17340.64259.356277.742037@montanaro.dyndns.org> Message-ID: <43BD703A.8040104@v.loewis.de> skip at pobox.com wrote: > me> I just modified that and rebooted the system. Things are looking > me> better. It's in the midst of a regrtest run at the moment. > > Except now it's failing test_curses. I thought I saw a checkin that would > cause that test to be skipped if not run on a tty. If you look carefully, you find it's worse: not only the test is skipped, but the interpreter exits. This should never ever happen. Not sure where the "Error opening terminal" message comes from - whoever produces this probably also cause process abortion. As I can't find the message in the Python sources, I suspect an OSX bug. Regards, Martin From python-checkins at python.org Fri Jan 6 00:14:27 2006 From: python-checkins at python.org (phillip.eby) Date: Fri, 6 Jan 2006 00:14:27 +0100 (CET) Subject: [Python-checkins] r41927 - in sandbox/trunk/setuptools: setuptools/__init__.py setuptools/command/build_ext.py setuptools/dist.py setuptools/extension.py tests tests/shlib_test tests/shlib_test/hello.c tests/shlib_test/hello.pyx tests/shlib_test/hellolib.c tests/shlib_test/setup.py tests/shlib_test/test_hello.py Message-ID: <20060105231427.222B81E4002@bag.python.org> Author: phillip.eby Date: Fri Jan 6 00:14:21 2006 New Revision: 41927 Added: sandbox/trunk/setuptools/tests/ sandbox/trunk/setuptools/tests/shlib_test/ sandbox/trunk/setuptools/tests/shlib_test/hello.c (contents, props changed) sandbox/trunk/setuptools/tests/shlib_test/hello.pyx (contents, props changed) sandbox/trunk/setuptools/tests/shlib_test/hellolib.c (contents, props changed) sandbox/trunk/setuptools/tests/shlib_test/setup.py (contents, props changed) sandbox/trunk/setuptools/tests/shlib_test/test_hello.py (contents, props changed) Modified: sandbox/trunk/setuptools/setuptools/__init__.py sandbox/trunk/setuptools/setuptools/command/build_ext.py sandbox/trunk/setuptools/setuptools/dist.py sandbox/trunk/setuptools/setuptools/extension.py Log: First draft of shared library build support. See tests/shlib_test for a trivial example. This has only been tested on Windows with a MinGW compiler, and the Mac OS support isn't finished. Testing w/other platforms+compilers would be helpful. Modified: sandbox/trunk/setuptools/setuptools/__init__.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/__init__.py (original) +++ sandbox/trunk/setuptools/setuptools/__init__.py Fri Jan 6 00:14:21 2006 @@ -1,8 +1,7 @@ - """Extensions to the 'distutils' for large or complex distributions""" +from setuptools.extension import Extension, SharedLibrary from setuptools.dist import Distribution, Feature, _get_unpatched import distutils.core, setuptools.command -from setuptools.extension import Extension from setuptools.depends import Require from distutils.core import Command as _Command from distutils.util import convert_path Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/build_ext.py (original) +++ sandbox/trunk/setuptools/setuptools/command/build_ext.py Fri Jan 6 00:14:21 2006 @@ -7,9 +7,11 @@ import os, sys from distutils.file_util import copy_file +from setuptools.extension import SharedLibrary +from distutils.ccompiler import new_compiler +from distutils.sysconfig import customize_compiler -class build_ext(_build_ext): - +class build_ext(_build_ext): def run(self): """Build extensions in build directory, then copy if --inplace""" old_inplace, self.inplace = self.inplace, 0 @@ -21,15 +23,13 @@ def copy_extensions_to_source(self): build_py = self.get_finalized_command('build_py') for ext in self.extensions or (): - fullname = ext.name + fullname = self.get_ext_fullname(ext.name) + filename = self.get_ext_filename(fullname) modpath = fullname.split('.') package = '.'.join(modpath[:-1]) - base = modpath[-1] package_dir = build_py.get_package_dir(package) - dest_filename = os.path.join(package_dir, - self.get_ext_filename(base)) - src_filename = os.path.join(self.build_lib, - self.get_ext_filename(fullname)) + dest_filename = os.path.join(package_dir,os.path.basename(filename)) + src_filename = os.path.join(self.build_lib,filename) # Always copy, even if source is older than destination, to ensure # that the right extensions for the current Python/platform are @@ -47,6 +47,88 @@ # Then do any actual SWIG stuff on the remainder return _du_build_ext.swig_sources(self, sources, *otherargs) + def get_ext_filename(self, fullname): + filename = _build_ext.get_ext_filename(self,fullname) + for ext in self.shlibs: + if self.get_ext_fullname(ext.name)==fullname: + fn, ext = os.path.splitext(filename) + fn = self.shlib_compiler.library_filename(fn,'shared') + print "shlib",fn + return fn + return filename + + def initialize_options(self): + _build_ext.initialize_options(self) + self.shlib_compiler = None + self.shlibs = [] + + def finalize_options(self): + _build_ext.finalize_options(self) + self.shlibs = [ext for ext in self.extensions or () + if isinstance(ext,SharedLibrary)] + if self.shlibs: + self.setup_shlib_compiler() + self.library_dirs.append(self.build_lib) + + def build_extension(self, ext): + _compiler = self.compiler + try: + if isinstance(ext,SharedLibrary): + self.compiler = self.shlib_compiler + _build_ext.build_extension(self,ext) + finally: + self.compiler = _compiler + + + def setup_shlib_compiler(self): + compiler = self.shlib_compiler = new_compiler( + compiler=self.compiler, dry_run=self.dry_run, force=self.force + ) + customize_compiler(compiler) + if sys.platform == "darwin": + # XXX need to fix up compiler_so:ccshared + linker_so:ldshared too + compiler.shared_lib_extension = ".dylib" + + if self.include_dirs is not None: + compiler.set_include_dirs(self.include_dirs) + if self.define is not None: + # 'define' option is a list of (name,value) tuples + for (name,value) in self.define: + compiler.define_macro(name, value) + if self.undef is not None: + for macro in self.undef: + compiler.undefine_macro(macro) + if self.libraries is not None: + compiler.set_libraries(self.libraries) + if self.library_dirs is not None: + compiler.set_library_dirs(self.library_dirs) + if self.rpath is not None: + compiler.set_runtime_library_dirs(self.rpath) + if self.link_objects is not None: + compiler.set_link_objects(self.link_objects) + + # hack so distutils' build_extension() builds a shared lib instead + # + def link_shared_object(self, objects, output_libname, output_dir=None, + libraries=None, library_dirs=None, runtime_library_dirs=None, + export_symbols=None, debug=0, extra_preargs=None, + extra_postargs=None, build_temp=None, target_lang=None + ): self.link( + self.SHARED_LIBRARY, objects, output_libname, + output_dir, libraries, library_dirs, runtime_library_dirs, + export_symbols, debug, extra_preargs, extra_postargs, + build_temp, target_lang + ) + compiler.link_shared_object = link_shared_object.__get__(compiler) + + def get_export_symbols(self, ext): + if isinstance(ext,SharedLibrary): + return ext.export_symbols + return _build_ext.get_export_symbols(self,ext) + + + + Modified: sandbox/trunk/setuptools/setuptools/dist.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/dist.py (original) +++ sandbox/trunk/setuptools/setuptools/dist.py Fri Jan 6 00:14:21 2006 @@ -1,9 +1,7 @@ __all__ = ['Distribution', 'Feature'] from distutils.core import Distribution as _Distribution -from distutils.core import Extension from setuptools.depends import Require -from setuptools.command.build_ext import build_ext from setuptools.command.install import install from setuptools.command.sdist import sdist from setuptools.command.install_lib import install_lib @@ -39,6 +37,8 @@ + + def assert_string_list(dist, attr, value): """Verify that value is a string list or None""" try: Modified: sandbox/trunk/setuptools/setuptools/extension.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/extension.py (original) +++ sandbox/trunk/setuptools/setuptools/extension.py Fri Jan 6 00:14:21 2006 @@ -1,19 +1,20 @@ from distutils.core import Extension as _Extension +from dist import _get_unpatched +_Extension = _get_unpatched(_Extension) try: from Pyrex.Distutils.build_ext import build_ext - except ImportError: + have_pyrex = False +else: + have_pyrex = True - # Pyrex isn't around, so fix up the sources - - from dist import _get_unpatched - _Extension = _get_unpatched(_Extension) - - class Extension(_Extension): - """Extension that uses '.c' files in place of '.pyx' files""" +class Extension(_Extension): + """Extension that uses '.c' files in place of '.pyx' files""" + if not have_pyrex: + # convert .pyx extensions to .c def __init__(self,*args,**kw): _Extension.__init__(self,*args,**kw) sources = [] @@ -24,14 +25,12 @@ sources.append(s) self.sources = sources - import sys, distutils.core, distutils.extension - distutils.core.Extension = Extension - distutils.extension.Extension = Extension - if 'distutils.command.build_ext' in sys.modules: - sys.modules['distutils.command.build_ext'].Extension = Extension - -else: +class SharedLibrary(Extension): + """Just like a regular Extension, but built as a shared library instead""" - # Pyrex is here, just use regular extension type - Extension = _Extension +import sys, distutils.core, distutils.extension +distutils.core.Extension = Extension +distutils.extension.Extension = Extension +if 'distutils.command.build_ext' in sys.modules: + sys.modules['distutils.command.build_ext'].Extension = Extension Added: sandbox/trunk/setuptools/tests/shlib_test/hello.c ============================================================================== --- (empty file) +++ sandbox/trunk/setuptools/tests/shlib_test/hello.c Fri Jan 6 00:14:21 2006 @@ -0,0 +1,168 @@ +/* Generated by Pyrex 0.9.3 on Thu Jan 05 17:47:12 2006 */ + +#include "Python.h" +#include "structmember.h" +#ifndef PY_LONG_LONG + #define PY_LONG_LONG LONG_LONG +#endif + + +typedef struct {PyObject **p; char *s;} __Pyx_InternTabEntry; /*proto*/ +typedef struct {PyObject **p; char *s; long n;} __Pyx_StringTabEntry; /*proto*/ +static PyObject *__Pyx_UnpackItem(PyObject *, int); /*proto*/ +static int __Pyx_EndUnpack(PyObject *, int); /*proto*/ +static int __Pyx_PrintItem(PyObject *); /*proto*/ +static int __Pyx_PrintNewline(void); /*proto*/ +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb); /*proto*/ +static void __Pyx_ReRaise(void); /*proto*/ +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list); /*proto*/ +static PyObject *__Pyx_GetExcValue(void); /*proto*/ +static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, char *name); /*proto*/ +static int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); /*proto*/ +static int __Pyx_GetStarArgs(PyObject **args, PyObject **kwds, char *kwd_list[], int nargs, PyObject **args2, PyObject **kwds2); /*proto*/ +static void __Pyx_WriteUnraisable(char *name); /*proto*/ +static void __Pyx_AddTraceback(char *funcname); /*proto*/ +static PyTypeObject *__Pyx_ImportType(char *module_name, char *class_name, long size); /*proto*/ +static int __Pyx_SetVtable(PyObject *dict, void *vtable); /*proto*/ +static int __Pyx_GetVtable(PyObject *dict, void *vtabptr); /*proto*/ +static PyObject *__Pyx_CreateClass(PyObject *bases, PyObject *dict, PyObject *name, char *modname); /*proto*/ +static int __Pyx_InternStrings(__Pyx_InternTabEntry *t); /*proto*/ +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/ +static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name); /*proto*/ + +static PyObject *__pyx_m; +static PyObject *__pyx_b; +static int __pyx_lineno; +static char *__pyx_filename; +staticforward char **__pyx_f; + +/* Declarations from hello */ + +char (*(get_hello_msg(void))); /*proto*/ + +/* Implementation of hello */ + +static PyObject *__pyx_n_hello; + +static PyObject *__pyx_f_5hello_hello(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyObject *__pyx_f_5hello_hello(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + PyObject *__pyx_r; + PyObject *__pyx_1 = 0; + static char *__pyx_argnames[] = {0}; + if (!PyArg_ParseTupleAndKeywords(__pyx_args, __pyx_kwds, "", __pyx_argnames)) return 0; + + /* "C:\cygwin\home\pje\setuptools\tests\shlib_test\hello.pyx":4 */ + __pyx_1 = PyString_FromString(get_hello_msg()); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; goto __pyx_L1;} + __pyx_r = __pyx_1; + __pyx_1 = 0; + goto __pyx_L0; + + __pyx_r = Py_None; Py_INCREF(__pyx_r); + goto __pyx_L0; + __pyx_L1:; + Py_XDECREF(__pyx_1); + __Pyx_AddTraceback("hello.hello"); + __pyx_r = 0; + __pyx_L0:; + return __pyx_r; +} + +static __Pyx_InternTabEntry __pyx_intern_tab[] = { + {&__pyx_n_hello, "hello"}, + {0, 0} +}; + +static struct PyMethodDef __pyx_methods[] = { + {"hello", (PyCFunction)__pyx_f_5hello_hello, METH_VARARGS|METH_KEYWORDS, 0}, + {0, 0, 0, 0} +}; + +DL_EXPORT(void) inithello(void); /*proto*/ +DL_EXPORT(void) inithello(void) { + __pyx_m = Py_InitModule4("hello", __pyx_methods, 0, 0, PYTHON_API_VERSION); + if (!__pyx_m) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; goto __pyx_L1;}; + __pyx_b = PyImport_AddModule("__builtin__"); + if (!__pyx_b) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; goto __pyx_L1;}; + if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; goto __pyx_L1;}; + if (__Pyx_InternStrings(__pyx_intern_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; goto __pyx_L1;}; + + /* "C:\cygwin\home\pje\setuptools\tests\shlib_test\hello.pyx":3 */ + return; + __pyx_L1:; + __Pyx_AddTraceback("hello"); +} + +static char *__pyx_filenames[] = { + "hello.pyx", +}; +statichere char **__pyx_f = __pyx_filenames; + +/* Runtime support code */ + +static int __Pyx_InternStrings(__Pyx_InternTabEntry *t) { + while (t->p) { + *t->p = PyString_InternFromString(t->s); + if (!*t->p) + return -1; + ++t; + } + return 0; +} + +#include "compile.h" +#include "frameobject.h" +#include "traceback.h" + +static void __Pyx_AddTraceback(char *funcname) { + PyObject *py_srcfile = 0; + PyObject *py_funcname = 0; + PyObject *py_globals = 0; + PyObject *empty_tuple = 0; + PyObject *empty_string = 0; + PyCodeObject *py_code = 0; + PyFrameObject *py_frame = 0; + + py_srcfile = PyString_FromString(__pyx_filename); + if (!py_srcfile) goto bad; + py_funcname = PyString_FromString(funcname); + if (!py_funcname) goto bad; + py_globals = PyModule_GetDict(__pyx_m); + if (!py_globals) goto bad; + empty_tuple = PyTuple_New(0); + if (!empty_tuple) goto bad; + empty_string = PyString_FromString(""); + if (!empty_string) goto bad; + py_code = PyCode_New( + 0, /*int argcount,*/ + 0, /*int nlocals,*/ + 0, /*int stacksize,*/ + 0, /*int flags,*/ + empty_string, /*PyObject *code,*/ + empty_tuple, /*PyObject *consts,*/ + empty_tuple, /*PyObject *names,*/ + empty_tuple, /*PyObject *varnames,*/ + empty_tuple, /*PyObject *freevars,*/ + empty_tuple, /*PyObject *cellvars,*/ + py_srcfile, /*PyObject *filename,*/ + py_funcname, /*PyObject *name,*/ + __pyx_lineno, /*int firstlineno,*/ + empty_string /*PyObject *lnotab*/ + ); + if (!py_code) goto bad; + py_frame = PyFrame_New( + PyThreadState_Get(), /*PyThreadState *tstate,*/ + py_code, /*PyCodeObject *code,*/ + py_globals, /*PyObject *globals,*/ + 0 /*PyObject *locals*/ + ); + if (!py_frame) goto bad; + py_frame->f_lineno = __pyx_lineno; + PyTraceBack_Here(py_frame); +bad: + Py_XDECREF(py_srcfile); + Py_XDECREF(py_funcname); + Py_XDECREF(empty_tuple); + Py_XDECREF(empty_string); + Py_XDECREF(py_code); + Py_XDECREF(py_frame); +} Added: sandbox/trunk/setuptools/tests/shlib_test/hello.pyx ============================================================================== --- (empty file) +++ sandbox/trunk/setuptools/tests/shlib_test/hello.pyx Fri Jan 6 00:14:21 2006 @@ -0,0 +1,4 @@ +cdef extern char *get_hello_msg() + +def hello(): + return get_hello_msg() Added: sandbox/trunk/setuptools/tests/shlib_test/hellolib.c ============================================================================== --- (empty file) +++ sandbox/trunk/setuptools/tests/shlib_test/hellolib.c Fri Jan 6 00:14:21 2006 @@ -0,0 +1,3 @@ +extern char* get_hello_msg() { + return "Hello, world!"; +} Added: sandbox/trunk/setuptools/tests/shlib_test/setup.py ============================================================================== --- (empty file) +++ sandbox/trunk/setuptools/tests/shlib_test/setup.py Fri Jan 6 00:14:21 2006 @@ -0,0 +1,10 @@ +from setuptools import setup, Extension, SharedLibrary + +setup( + name="shlib_test", + ext_modules = [ + SharedLibrary("hellolib", ["hellolib.c"]), + Extension("hello", ["hello.pyx"], libraries=["hellolib"]) + ], + test_suite="test_hello.HelloWorldTest", +) Added: sandbox/trunk/setuptools/tests/shlib_test/test_hello.py ============================================================================== --- (empty file) +++ sandbox/trunk/setuptools/tests/shlib_test/test_hello.py Fri Jan 6 00:14:21 2006 @@ -0,0 +1,7 @@ +from unittest import TestCase + +class HelloWorldTest(TestCase): + def testHelloMsg(self): + from hello import hello + self.assertEqual(hello(), "Hello, world!") + From python-checkins at python.org Fri Jan 6 00:30:43 2006 From: python-checkins at python.org (phillip.eby) Date: Fri, 6 Jan 2006 00:30:43 +0100 (CET) Subject: [Python-checkins] r41928 - sandbox/trunk/setuptools/setuptools/command/build_ext.py Message-ID: <20060105233043.513DD1E4002@bag.python.org> Author: phillip.eby Date: Fri Jan 6 00:30:42 2006 New Revision: 41928 Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py Log: Remove debug print left in by mistake. Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/build_ext.py (original) +++ sandbox/trunk/setuptools/setuptools/command/build_ext.py Fri Jan 6 00:30:42 2006 @@ -52,9 +52,7 @@ for ext in self.shlibs: if self.get_ext_fullname(ext.name)==fullname: fn, ext = os.path.splitext(filename) - fn = self.shlib_compiler.library_filename(fn,'shared') - print "shlib",fn - return fn + return self.shlib_compiler.library_filename(fn,'shared') return filename def initialize_options(self): @@ -80,6 +78,8 @@ self.compiler = _compiler + + def setup_shlib_compiler(self): compiler = self.shlib_compiler = new_compiler( compiler=self.compiler, dry_run=self.dry_run, force=self.force From skip at pobox.com Fri Jan 6 00:37:04 2006 From: skip at pobox.com (skip at pobox.com) Date: Thu, 5 Jan 2006 17:37:04 -0600 Subject: [Python-checkins] commit of r41907 - python/trunk/Makefile.pre.in In-Reply-To: <43BD703A.8040104@v.loewis.de> References: <20060103143057.4DB8E1E401C@bag.python.org> <43BB9562.2070404@acm.org> <200601050238.51766.anthony@interlink.com.au> <1136406410.10342.36.camel@geddy.wooz.org> <43BC44CE.8070000@acm.org> <17340.63817.672805.755128@montanaro.dyndns.org> <17340.64259.356277.742037@montanaro.dyndns.org> <43BD703A.8040104@v.loewis.de> Message-ID: <17341.44448.703814.347180@montanaro.dyndns.org> Martin> Not sure where the "Error opening terminal" message comes from - Martin> whoever produces this probably also cause process abortion. On my Mac that's present in /usr/lib/libncurses.5.dylib. That tells me the tty test isn't doing what it's supposed to do. Apparently execution is still getting to the "import curses" line. Skip From python-checkins at python.org Fri Jan 6 00:38:57 2006 From: python-checkins at python.org (martin.v.loewis) Date: Fri, 6 Jan 2006 00:38:57 +0100 (CET) Subject: [Python-checkins] r41929 - in python/trunk: Doc/lib/libsys.tex Include/patchlevel.h Include/pythonrun.h Modules/getbuildinfo.c Python/sysmodule.c Message-ID: <20060105233857.4094A1E4002@bag.python.org> Author: martin.v.loewis Date: Fri Jan 6 00:38:54 2006 New Revision: 41929 Modified: python/trunk/Doc/lib/libsys.tex python/trunk/Include/patchlevel.h python/trunk/Include/pythonrun.h python/trunk/Modules/getbuildinfo.c python/trunk/Python/sysmodule.c Log: Drop sys.build_number. Add sys.subversion. Modified: python/trunk/Doc/lib/libsys.tex ============================================================================== --- python/trunk/Doc/lib/libsys.tex (original) +++ python/trunk/Doc/lib/libsys.tex Fri Jan 6 00:38:54 2006 @@ -27,10 +27,19 @@ \versionadded{2.0} \end{datadesc} -\begin{datadesc}{build_number} - A string representing the Subversion revision that this Python executable - was built from. This number is a string because it may contain a trailing - 'M' if Python was built from a mixed revision source tree. +\begin{datadesc}{subversion} + A triple (repo, branch, version) representing the Subversion + information of the Python interpreter. + \var{repo} is the name of the repository, \code{'CPython'}. + \var{branch} is the a string of one of the forms \code{'trunk'}, + \code{'branches/name'} or \code{'tags/name'}. + \var{version} is the output of \code{svnversion}, if the + interpreter was built from a Subversion checkout; it contains + the revision number (range) and possibly a trailing 'M' if + there were local modifications. If the tree was exported + (or svnversion was not available), it is the revision of + \code{Include/patchlevel.h} if the branch is a tag. Otherwise, + it is \code{None}. \versionadded{2.5} \end{datadesc} Modified: python/trunk/Include/patchlevel.h ============================================================================== --- python/trunk/Include/patchlevel.h (original) +++ python/trunk/Include/patchlevel.h Fri Jan 6 00:38:54 2006 @@ -28,6 +28,9 @@ /* Version as a string */ #define PY_VERSION "2.5a0" +/* Subversion Revision number of this file (not of the repository) */ +#define PY_PATCHLEVEL_REVISION "$Revision$" + /* Version as a single 4-byte hex number, e.g. 0x010502B2 == 1.5.2b2. Use this for numeric comparisons, e.g. #if PY_VERSION_HEX >= ... */ #define PY_VERSION_HEX ((PY_MAJOR_VERSION << 24) | \ Modified: python/trunk/Include/pythonrun.h ============================================================================== --- python/trunk/Include/pythonrun.h (original) +++ python/trunk/Include/pythonrun.h Fri Jan 6 00:38:54 2006 @@ -108,7 +108,9 @@ PyAPI_FUNC(const char *) Py_GetCopyright(void); PyAPI_FUNC(const char *) Py_GetCompiler(void); PyAPI_FUNC(const char *) Py_GetBuildInfo(void); -PyAPI_FUNC(const char *) Py_GetBuildNumber(void); +PyAPI_FUNC(const char *) _Py_svnversion(void); +PyAPI_FUNC(const char *) Py_SubversionRevision(void); +PyAPI_FUNC(const char *) Py_SubversionShortBranch(void); /* Internal -- various one-time initializations */ PyAPI_FUNC(PyObject *) _PyBuiltin_Init(void); Modified: python/trunk/Modules/getbuildinfo.c ============================================================================== --- python/trunk/Modules/getbuildinfo.c (original) +++ python/trunk/Modules/getbuildinfo.c Fri Jan 6 00:38:54 2006 @@ -20,32 +20,25 @@ #endif #endif -static const char revision[] = "$Revision$"; -static const char headurl[] = "$HeadURL$"; - const char * Py_GetBuildInfo(void) { static char buildinfo[50]; -#ifdef SVNVERSION - static char svnversion[50] = SVNVERSION; -#else - static char svnversion[50] = "exported"; -#endif - if (strcmp(svnversion, "exported") == 0 && - strstr(headurl, "/tags/") != NULL) { - int start = 11; - int stop = strlen(revision)-2; - strncpy(svnversion, revision+start, stop-start); - svnversion[stop-start] = '\0'; - } + char *revision = Py_SubversionRevision(); + char *sep = revision ? ":" : ""; + char *branch = Py_SubversionShortBranch(); PyOS_snprintf(buildinfo, sizeof(buildinfo), - "%s, %.20s, %.9s", svnversion, DATE, TIME); + "%s%s%s, %.20s, %.9s", branch, sep, revision, + DATE, TIME); return buildinfo; } const char * -Py_GetBuildNumber(void) +_Py_svnversion(void) { - return "0"; +#ifdef SVNVERSION + return SVNVERSION; +#else + return "exported"; +#endif } Modified: python/trunk/Python/sysmodule.c ============================================================================== --- python/trunk/Python/sysmodule.c (original) +++ python/trunk/Python/sysmodule.c Fri Jan 6 00:38:54 2006 @@ -934,6 +934,87 @@ return fflush (stream) || prev_fail ? EOF : 0; } +/* Subversion branch and revision management */ +static const char _patchlevel_revision[] = PY_PATCHLEVEL_REVISION; +static const char headurl[] = "$HeadURL$"; +static int svn_initialized; +static char patchlevel_revision[50]; /* Just the number */ +static char branch[50]; +static char shortbranch[50]; +static const char *svn_revision; + +static void svnversion_init(void) +{ + const char *python, *br_start, *br_end, *br_end2, *svnversion; + int len, istag; + + if (svn_initialized) + return; + + python = strstr(headurl, "/python/"); + if (!python) + Py_FatalError("subversion keywords missing"); + + br_start = python + 8; + br_end = strchr(br_start, '/'); + /* Works even for trunk, + as we are in trunk/Python/sysmodule.c */ + br_end2 = strchr(br_end+1, '/'); + + istag = strncmp(br_start, "tags", 4) == 0; + if (strncmp(br_start, "trunk", 5) == 0) { + strcpy(branch, "trunk"); + strcpy(shortbranch, "trunk"); + + } + else if (istag || strncmp(br_start, "branches", 8) == 0) { + len = br_end2 - br_start; + strncpy(branch, br_start, len); + branch[len] = '\0'; + + len = br_end2 - (br_end + 1); + strncpy(shortbranch, br_end + 1, len); + shortbranch[len] = '\0'; + } + else { + Py_FatalError("bad HeadURL"); + return; + } + + + svnversion = _Py_svnversion(); + if (strcmp(svnversion, "exported") != 0) + svn_revision = svnversion; + else if (istag) { + len = strlen(_patchlevel_revision); + strncpy(patchlevel_revision, _patchlevel_revision + 11, + len - 13); + patchlevel_revision[len - 13] = '\0'; + svn_revision = patchlevel_revision; + } + else + svn_revision = ""; + + svn_initialized = 1; +} + +/* Return svnversion output if available. + Else return Revision of patchlevel.h if on branch. + Else return empty string */ +const char* +Py_SubversionRevision() +{ + svnversion_init(); + return svn_revision; +} + +const char* +Py_SubversionShortBranch() +{ + svnversion_init(); + return shortbranch; +} + PyObject * _PySys_Init(void) { @@ -1003,8 +1084,9 @@ PyDict_SetItemString(sysdict, "hexversion", v = PyInt_FromLong(PY_VERSION_HEX)); Py_XDECREF(v); - PyDict_SetItemString(sysdict, "build_number", - v = PyString_FromString(Py_GetBuildNumber())); + svnversion_init(); + v = Py_BuildValue("(ssz)", "CPython", branch, svn_revision); + PyDict_SetItemString(sysdict, "subversion", v); Py_XDECREF(v); /* * These release level checks are mutually exclusive and cover From python-checkins at python.org Fri Jan 6 00:40:03 2006 From: python-checkins at python.org (martin.v.loewis) Date: Fri, 6 Jan 2006 00:40:03 +0100 (CET) Subject: [Python-checkins] r41930 - python/trunk/Python/sysmodule.c Message-ID: <20060105234003.712A61E4002@bag.python.org> Author: martin.v.loewis Date: Fri Jan 6 00:40:01 2006 New Revision: 41930 Modified: python/trunk/Python/sysmodule.c (props changed) Log: Add HeadURL to the keywords. From barry at python.org Fri Jan 6 00:49:18 2006 From: barry at python.org (Barry Warsaw) Date: Thu, 05 Jan 2006 18:49:18 -0500 Subject: [Python-checkins] r41929 - in python/trunk: Doc/lib/libsys.tex Include/patchlevel.h Include/pythonrun.h Modules/getbuildinfo.c Python/sysmodule.c In-Reply-To: <20060105233857.4094A1E4002@bag.python.org> References: <20060105233857.4094A1E4002@bag.python.org> Message-ID: <1136504958.15440.78.camel@geddy.wooz.org> On Fri, 2006-01-06 at 00:38 +0100, martin.v.loewis wrote: > Author: martin.v.loewis > Date: Fri Jan 6 00:38:54 2006 > New Revision: 41929 > > Modified: > python/trunk/Doc/lib/libsys.tex > python/trunk/Include/patchlevel.h > python/trunk/Include/pythonrun.h > python/trunk/Modules/getbuildinfo.c > python/trunk/Python/sysmodule.c > Log: > Drop sys.build_number. Add sys.subversion. I haven't tried this change yet, but I'd like to quibble about the sys attribute name. OT1H, it's more descriptive, but OTOH I don't like the idea of having such an attribute name tied to the particular flavor of version control system we happen to be using at the moment. Can we make the attribute more generic and still retain the descriptive relevance? E.g. it's "Python-checkins" rather than "Python-CVS" or "Python-SVN". -Barry -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 307 bytes Desc: This is a digitally signed message part Url : http://mail.python.org/pipermail/python-checkins/attachments/20060105/acd23e95/attachment.pgp From martin at v.loewis.de Fri Jan 6 01:24:53 2006 From: martin at v.loewis.de (=?ISO-8859-1?Q?=22Martin_v=2E_L=F6wis=22?=) Date: Fri, 06 Jan 2006 01:24:53 +0100 Subject: [Python-checkins] commit of r41907 - python/trunk/Makefile.pre.in In-Reply-To: <17341.44448.703814.347180@montanaro.dyndns.org> References: <20060103143057.4DB8E1E401C@bag.python.org> <43BB9562.2070404@acm.org> <200601050238.51766.anthony@interlink.com.au> <1136406410.10342.36.camel@geddy.wooz.org> <43BC44CE.8070000@acm.org> <17340.63817.672805.755128@montanaro.dyndns.org> <17340.64259.356277.742037@montanaro.dyndns.org> <43BD703A.8040104@v.loewis.de> <17341.44448.703814.347180@montanaro.dyndns.org> Message-ID: <43BDB8D5.1050400@v.loewis.de> skip at pobox.com wrote: > Martin> Not sure where the "Error opening terminal" message comes from - > Martin> whoever produces this probably also cause process abortion. > > On my Mac that's present in /usr/lib/libncurses.5.dylib. That tells me the > tty test isn't doing what it's supposed to do. Apparently execution is > still getting to the "import curses" line. Ah. I guess some OSX guru would need to investigate that. It should *never* be possible for in import statement to abort the interpreter without an exception. OTOH, it's hard to see why a plain import could cause this, in the first place. init_curses does not call any curses functions. Thinking about it: more likely, the test that Neal added (os.isatty(sys.stdin.fileno())) *passes*, and it is the call to initscr that crashes. Apparently, the string after this message ("unknown" in your case) is meant to indicate the terminal name (e.g. from the TERM variable). If I do the following on Linux, I get the same effect: martin at mira:~/work/py2.5$ TERM=unknown ./python Python 2.5a0 (2.4.5, Jan 6 2006, 00:43:47) [GCC 4.0.3 20051201 (prerelease) (Debian 4.0.2-5)] on linux2 Type "help", "copyright", "credits" or "license" for more information. py> import curses py> curses.initscr() Error opening terminal: unknown. So you can easily get ncurses to abort your program if it doesn't know the terminal. So we would rather need an ncurses guru, then, to find out why this happens and how it could be avoided. Regards, Martin From martin at v.loewis.de Fri Jan 6 01:30:20 2006 From: martin at v.loewis.de (=?ISO-8859-1?Q?=22Martin_v=2E_L=F6wis=22?=) Date: Fri, 06 Jan 2006 01:30:20 +0100 Subject: [Python-checkins] r41929 - in python/trunk: Doc/lib/libsys.tex Include/patchlevel.h Include/pythonrun.h Modules/getbuildinfo.c Python/sysmodule.c In-Reply-To: <1136504958.15440.78.camel@geddy.wooz.org> References: <20060105233857.4094A1E4002@bag.python.org> <1136504958.15440.78.camel@geddy.wooz.org> Message-ID: <43BDBA1C.9010307@v.loewis.de> Barry Warsaw wrote: > I haven't tried this change yet, but I'd like to quibble about the sys > attribute name. OT1H, it's more descriptive, but OTOH I don't like the > idea of having such an attribute name tied to the particular flavor of > version control system we happen to be using at the moment. Can we make > the attribute more generic and still retain the descriptive relevance? I saw this coming :-) I think I'm happy with *any* name for it. The interpretation of it will still change with the next VC system. Currently, the "branch" field could be "branches/ssize_t", or "tags/2.5.2". I don't think we should promise that this specific format applies forever. So code relying on it will break even if the name stays the same. Still, the name could be more generic. It would give a warm fuzzy feeling, but not actual technical advantages. Regards, Martin P.S. I shortly thought about keeping the lexical order in libsys.tex, but then decided not to, because it would have to be reordered if the name changed again :-) From python-checkins at python.org Fri Jan 6 01:43:43 2006 From: python-checkins at python.org (martin.v.loewis) Date: Fri, 6 Jan 2006 01:43:43 +0100 (CET) Subject: [Python-checkins] r41931 - python/trunk/Modules/getbuildinfo.c Message-ID: <20060106004343.411F51E4007@bag.python.org> Author: martin.v.loewis Date: Fri Jan 6 01:43:42 2006 New Revision: 41931 Modified: python/trunk/Modules/getbuildinfo.c Log: Deal with empty svn_revisions. Modified: python/trunk/Modules/getbuildinfo.c ============================================================================== --- python/trunk/Modules/getbuildinfo.c (original) +++ python/trunk/Modules/getbuildinfo.c Fri Jan 6 01:43:42 2006 @@ -25,7 +25,7 @@ { static char buildinfo[50]; char *revision = Py_SubversionRevision(); - char *sep = revision ? ":" : ""; + char *sep = *revision ? ":" : ""; char *branch = Py_SubversionShortBranch(); PyOS_snprintf(buildinfo, sizeof(buildinfo), "%s%s%s, %.20s, %.9s", branch, sep, revision, From python-checkins at python.org Fri Jan 6 01:44:12 2006 From: python-checkins at python.org (martin.v.loewis) Date: Fri, 6 Jan 2006 01:44:12 +0100 (CET) Subject: [Python-checkins] r41932 - python/trunk/Lib/platform.py Message-ID: <20060106004412.560451E4007@bag.python.org> Author: martin.v.loewis Date: Fri Jan 6 01:44:11 2006 New Revision: 41932 Modified: python/trunk/Lib/platform.py Log: Generalize buildno to be a sequence of non-comma characters. Modified: python/trunk/Lib/platform.py ============================================================================== --- python/trunk/Lib/platform.py (original) +++ python/trunk/Lib/platform.py Fri Jan 6 01:44:11 2006 @@ -1092,7 +1092,7 @@ ### Various APIs for extracting information from sys.version _sys_version_parser = re.compile(r'([\w.+]+)\s*' - '\(#?(\d+:?\d+M?),\s*([\w ]+),\s*([\w :]+)\)\s*' + '\(#?([^,]+),\s*([\w ]+),\s*([\w :]+)\)\s*' '\[([^\]]+)\]?') _sys_version_cache = None From martin at v.loewis.de Fri Jan 6 02:26:15 2006 From: martin at v.loewis.de (=?ISO-8859-1?Q?=22Martin_v=2E_L=F6wis=22?=) Date: Fri, 06 Jan 2006 02:26:15 +0100 Subject: [Python-checkins] commit of r41907 - python/trunk/Makefile.pre.in In-Reply-To: <43BDB8D5.1050400@v.loewis.de> References: <20060103143057.4DB8E1E401C@bag.python.org> <43BB9562.2070404@acm.org> <200601050238.51766.anthony@interlink.com.au> <1136406410.10342.36.camel@geddy.wooz.org> <43BC44CE.8070000@acm.org> <17340.63817.672805.755128@montanaro.dyndns.org> <17340.64259.356277.742037@montanaro.dyndns.org> <43BD703A.8040104@v.loewis.de> <17341.44448.703814.347180@montanaro.dyndns.org> <43BDB8D5.1050400@v.loewis.de> Message-ID: <43BDC737.2000508@v.loewis.de> Martin v. L?wis wrote: > So we would rather need an ncurses guru, then, to find out why > this happens and how it could be avoided. I just checked ncurses and POSIX, and both say that initscr writes an error message and exits if the terminal cannot be initialized. It is (according to POSIX) equivalent to (if it succeeds) newterm(getenv("TERM"), stdout, stdin); return stdscr; so applications should use newterm directly if they don't wish curses to terminate them. Regards, Martin From barry at python.org Fri Jan 6 02:57:34 2006 From: barry at python.org (Barry Warsaw) Date: Thu, 05 Jan 2006 20:57:34 -0500 Subject: [Python-checkins] r41929 - in python/trunk: Doc/lib/libsys.tex Include/patchlevel.h Include/pythonrun.h Modules/getbuildinfo.c Python/sysmodule.c In-Reply-To: <43BDBA1C.9010307@v.loewis.de> References: <20060105233857.4094A1E4002@bag.python.org> <1136504958.15440.78.camel@geddy.wooz.org> <43BDBA1C.9010307@v.loewis.de> Message-ID: <1136512654.15425.83.camel@geddy.wooz.org> On Fri, 2006-01-06 at 01:30 +0100, "Martin v. L?wis" wrote: > I saw this coming :-) I think I'm happy with *any* name for it. The > interpretation of it will still change with the next VC system. > Currently, the "branch" field could be "branches/ssize_t", or > "tags/2.5.2". I don't think we should promise that this specific > format applies forever. So code relying on it will break even > if the name stays the same. > > Still, the name could be more generic. It would give a warm fuzzy > feeling, but not actual technical advantages. > > Regards, > Martin > > P.S. I shortly thought about keeping the lexical order in > libsys.tex, but then decided not to, because it would have > to be reordered if the name changed again :-) :) I think someone suggested in a previous message sys.build_info and that works for me! -Barry -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 307 bytes Desc: This is a digitally signed message part Url : http://mail.python.org/pipermail/python-checkins/attachments/20060105/72f426e9/attachment.pgp From python-checkins at python.org Fri Jan 6 03:40:54 2006 From: python-checkins at python.org (tim.peters) Date: Fri, 6 Jan 2006 03:40:54 +0100 (CET) Subject: [Python-checkins] r41933 - python/trunk/Python/sysmodule.c Message-ID: <20060106024054.0C5441E4002@bag.python.org> Author: tim.peters Date: Fri Jan 6 03:40:53 2006 New Revision: 41933 Modified: python/trunk/Python/sysmodule.c Log: Trimmed trailing whitespace. Modified: python/trunk/Python/sysmodule.c ============================================================================== --- python/trunk/Python/sysmodule.c (original) +++ python/trunk/Python/sysmodule.c Fri Jan 6 03:40:53 2006 @@ -457,16 +457,16 @@ tstate->interp->tscdump = 0; Py_INCREF(Py_None); return Py_None; - + } -PyDoc_STRVAR(settscdump_doc, +PyDoc_STRVAR(settscdump_doc, "settscdump(bool)\n\ \n\ If true, tell the Python interpreter to dump VM measurements to\n\ stderr. If false, turn off dump. The measurements are based on the\n\ processor's time-stamp counter." -); +); #endif /* TSC */ static PyObject * @@ -476,8 +476,8 @@ if (!PyArg_ParseTuple(args, "i:setrecursionlimit", &new_limit)) return NULL; if (new_limit <= 0) { - PyErr_SetString(PyExc_ValueError, - "recursion limit must be positive"); + PyErr_SetString(PyExc_ValueError, + "recursion limit must be positive"); return NULL; } Py_SetRecursionLimit(new_limit); @@ -713,7 +713,7 @@ static PyMethodDef sys_methods[] = { /* Might as well keep this in alphabetic order */ - {"callstats", (PyCFunction)PyEval_GetCallStats, METH_NOARGS, + {"callstats", (PyCFunction)PyEval_GetCallStats, METH_NOARGS, callstats_doc}, {"displayhook", sys_displayhook, METH_O, displayhook_doc}, {"exc_info", sys_exc_info, METH_NOARGS, exc_info_doc}, @@ -721,11 +721,11 @@ {"excepthook", sys_excepthook, METH_VARARGS, excepthook_doc}, {"exit", sys_exit, METH_VARARGS, exit_doc}, #ifdef Py_USING_UNICODE - {"getdefaultencoding", (PyCFunction)sys_getdefaultencoding, - METH_NOARGS, getdefaultencoding_doc}, + {"getdefaultencoding", (PyCFunction)sys_getdefaultencoding, + METH_NOARGS, getdefaultencoding_doc}, #endif #ifdef HAVE_DLOPEN - {"getdlopenflags", (PyCFunction)sys_getdlopenflags, METH_NOARGS, + {"getdlopenflags", (PyCFunction)sys_getdlopenflags, METH_NOARGS, getdlopenflags_doc}, #endif #ifdef COUNT_ALLOCS @@ -736,7 +736,7 @@ #endif #ifdef Py_USING_UNICODE {"getfilesystemencoding", (PyCFunction)sys_getfilesystemencoding, - METH_NOARGS, getfilesystemencoding_doc}, + METH_NOARGS, getfilesystemencoding_doc}, #endif #ifdef Py_TRACE_REFS {"getobjects", _Py_GetObjects, METH_VARARGS}, @@ -757,14 +757,14 @@ #endif #ifdef Py_USING_UNICODE {"setdefaultencoding", sys_setdefaultencoding, METH_VARARGS, - setdefaultencoding_doc}, + setdefaultencoding_doc}, #endif {"setcheckinterval", sys_setcheckinterval, METH_VARARGS, - setcheckinterval_doc}, + setcheckinterval_doc}, {"getcheckinterval", sys_getcheckinterval, METH_NOARGS, - getcheckinterval_doc}, + getcheckinterval_doc}, #ifdef HAVE_DLOPEN - {"setdlopenflags", sys_setdlopenflags, METH_VARARGS, + {"setdlopenflags", sys_setdlopenflags, METH_VARARGS, setdlopenflags_doc}, #endif {"setprofile", sys_setprofile, METH_O, setprofile_doc}, @@ -957,7 +957,7 @@ br_start = python + 8; br_end = strchr(br_start, '/'); - /* Works even for trunk, + /* Works even for trunk, as we are in trunk/Python/sysmodule.c */ br_end2 = strchr(br_end+1, '/'); @@ -966,7 +966,7 @@ strcpy(branch, "trunk"); strcpy(shortbranch, "trunk"); - } + } else if (istag || strncmp(br_start, "branches", 8) == 0) { len = br_end2 - br_start; strncpy(branch, br_start, len); @@ -975,7 +975,7 @@ len = br_end2 - (br_end + 1); strncpy(shortbranch, br_end + 1, len); shortbranch[len] = '\0'; - } + } else { Py_FatalError("bad HeadURL"); return; @@ -994,7 +994,7 @@ } else svn_revision = ""; - + svn_initialized = 1; } @@ -1168,7 +1168,7 @@ if (warnoptions != NULL) { PyDict_SetItemString(sysdict, "warnoptions", warnoptions); } - + if (PyErr_Occurred()) return NULL; return m; @@ -1180,7 +1180,7 @@ int i, n; char *p; PyObject *v, *w; - + n = 1; p = path; while ((p = strchr(p, delim)) != NULL) { From python-checkins at python.org Fri Jan 6 03:42:46 2006 From: python-checkins at python.org (tim.peters) Date: Fri, 6 Jan 2006 03:42:46 +0100 (CET) Subject: [Python-checkins] r41934 - python/trunk/Python/sysmodule.c Message-ID: <20060106024246.CEE771E4002@bag.python.org> Author: tim.peters Date: Fri Jan 6 03:42:46 2006 New Revision: 41934 Modified: python/trunk/Python/sysmodule.c Log: svnversion_init(): Use standard layout for function defn. Modified: python/trunk/Python/sysmodule.c ============================================================================== --- python/trunk/Python/sysmodule.c (original) +++ python/trunk/Python/sysmodule.c Fri Jan 6 03:42:46 2006 @@ -943,7 +943,8 @@ static char shortbranch[50]; static const char *svn_revision; -static void svnversion_init(void) +static void +svnversion_init(void) { const char *python, *br_start, *br_end, *br_end2, *svnversion; int len, istag; From python-checkins at python.org Fri Jan 6 03:45:17 2006 From: python-checkins at python.org (tim.peters) Date: Fri, 6 Jan 2006 03:45:17 +0100 (CET) Subject: [Python-checkins] r41935 - python/trunk/Modules/getbuildinfo.c Message-ID: <20060106024517.CBD131E4002@bag.python.org> Author: tim.peters Date: Fri Jan 6 03:45:17 2006 New Revision: 41935 Modified: python/trunk/Modules/getbuildinfo.c Log: Py_GetBuildInfo(): Squash compiler warnings. Locals `revision` and `branch` were const-incorrect. Modified: python/trunk/Modules/getbuildinfo.c ============================================================================== --- python/trunk/Modules/getbuildinfo.c (original) +++ python/trunk/Modules/getbuildinfo.c Fri Jan 6 03:45:17 2006 @@ -24,9 +24,9 @@ Py_GetBuildInfo(void) { static char buildinfo[50]; - char *revision = Py_SubversionRevision(); - char *sep = *revision ? ":" : ""; - char *branch = Py_SubversionShortBranch(); + const char *revision = Py_SubversionRevision(); + const char *sep = *revision ? ":" : ""; + const char *branch = Py_SubversionShortBranch(); PyOS_snprintf(buildinfo, sizeof(buildinfo), "%s%s%s, %.20s, %.9s", branch, sep, revision, DATE, TIME); From python-checkins at python.org Fri Jan 6 05:18:26 2006 From: python-checkins at python.org (neal.norwitz) Date: Fri, 6 Jan 2006 05:18:26 +0100 (CET) Subject: [Python-checkins] r41936 - python/trunk/Lib/test/test_curses.py Message-ID: <20060106041826.B36D11E4002@bag.python.org> Author: neal.norwitz Date: Fri Jan 6 05:18:21 2006 New Revision: 41936 Modified: python/trunk/Lib/test/test_curses.py Log: initscr() will exit if there's an error. Try to catch the obvious failure cases if TERM isn't set or is unknown (perhaps we should only check if unset or empty?) Skip the test if TERM isn't set. This seems to occur when running under buildbot and presumably cron. For some more info check here: http://mail.python.org/pipermail/python-checkins/2006-January/048704.html Will backport if it works. Modified: python/trunk/Lib/test/test_curses.py ============================================================================== --- python/trunk/Lib/test/test_curses.py (original) +++ python/trunk/Lib/test/test_curses.py Fri Jan 6 05:18:21 2006 @@ -14,10 +14,13 @@ # 'curses' resource be given on the regrtest command line using the -u # option. If not available, nothing after this line will be executed. -from test import test_support -test_support.requires('curses') -if not os.isatty(sys.stdin.fileno()): - raise test_support.TestSkipped, "stdin is not a tty" +from test.test_support import requires, TestSkipped +requires('curses') + +# XXX: if newterm was supported we could use it instead of initscr and not exit +term = os.environ.get('TERM') +if not term or term == 'unknown': + raise TestSkipped, "$TERM=%r, calling initscr() may cause exit" % term def window_funcs(stdscr): "Test the methods of windows" From python-checkins at python.org Fri Jan 6 05:40:21 2006 From: python-checkins at python.org (neal.norwitz) Date: Fri, 6 Jan 2006 05:40:21 +0100 (CET) Subject: [Python-checkins] r41937 - python/branches/release24-maint/Lib/test/test_curses.py Message-ID: <20060106044021.569651E4002@bag.python.org> Author: neal.norwitz Date: Fri Jan 6 05:40:20 2006 New Revision: 41937 Modified: python/branches/release24-maint/Lib/test/test_curses.py Log: Backport: initscr() will exit if there's an error. Try to catch the obvious failure cases if TERM isn't set or is unknown (perhaps we should only check if unset or empty?) Skip the test if TERM isn't set. This seems to occur when running under buildbot and presumably cron. For some more info check here: http://mail.python.org/pipermail/python-checkins/2006-January/048704.html Modified: python/branches/release24-maint/Lib/test/test_curses.py ============================================================================== --- python/branches/release24-maint/Lib/test/test_curses.py (original) +++ python/branches/release24-maint/Lib/test/test_curses.py Fri Jan 6 05:40:20 2006 @@ -14,10 +14,13 @@ # 'curses' resource be given on the regrtest command line using the -u # option. If not available, nothing after this line will be executed. -from test import test_support -test_support.requires('curses') -if not os.isatty(sys.stdin.fileno()): - raise test_support.TestSkipped, "stdin is not a tty" +from test.test_support import requires, TestSkipped +requires('curses') + +# XXX: if newterm was supported we could use it instead of initscr and not exit +term = os.environ.get('TERM') +if not term or term == 'unknown': + raise TestSkipped, "$TERM=%r, calling initscr() may cause exit" % term def window_funcs(stdscr): "Test the methods of windows" From nnorwitz at gmail.com Fri Jan 6 05:40:35 2006 From: nnorwitz at gmail.com (Neal Norwitz) Date: Thu, 5 Jan 2006 20:40:35 -0800 Subject: [Python-checkins] commit of r41907 - python/trunk/Makefile.pre.in In-Reply-To: <43BDC737.2000508@v.loewis.de> References: <20060103143057.4DB8E1E401C@bag.python.org> <1136406410.10342.36.camel@geddy.wooz.org> <43BC44CE.8070000@acm.org> <17340.63817.672805.755128@montanaro.dyndns.org> <17340.64259.356277.742037@montanaro.dyndns.org> <43BD703A.8040104@v.loewis.de> <17341.44448.703814.347180@montanaro.dyndns.org> <43BDB8D5.1050400@v.loewis.de> <43BDC737.2000508@v.loewis.de> Message-ID: On 1/5/06, "Martin v. L?wis" wrote: > Martin v. L?wis wrote: > > So we would rather need an ncurses guru, then, to find out why > > this happens and how it could be avoided. It looks like we have one. ;-) > I just checked ncurses and POSIX, and both say that initscr > writes an error message and exits if the terminal cannot be > initialized. I check for $TERM and that seems to fix the problem. I don't know what else to do. newterm() isn't supported by the curses modules. n From python-checkins at python.org Fri Jan 6 06:25:04 2006 From: python-checkins at python.org (neal.norwitz) Date: Fri, 6 Jan 2006 06:25:04 +0100 (CET) Subject: [Python-checkins] r41938 - in python/branches/release24-maint/Lib: distutils encodings Message-ID: <20060106052504.6CA2E1E4002@bag.python.org> Author: neal.norwitz Date: Fri Jan 6 06:25:01 2006 New Revision: 41938 Modified: python/branches/release24-maint/Lib/ (props changed) python/branches/release24-maint/Lib/distutils/ (props changed) python/branches/release24-maint/Lib/encodings/ (props changed) Log: Ignore pyo files From jimjjewett at gmail.com Fri Jan 6 19:32:17 2006 From: jimjjewett at gmail.com (Jim Jewett) Date: Fri, 6 Jan 2006 13:32:17 -0500 Subject: [Python-checkins] r41929 - in python/trunk: Doc/lib/libsys.tex Include/patchlevel.h Include/pythonrun.h Modules/getbuildinfo.c Python/sysmodule.c In-Reply-To: <20060105233857.4094A1E4002@bag.python.org> References: <20060105233857.4094A1E4002@bag.python.org> Message-ID: minor typo -- " the a " -> " a " > + \var{branch} is the a string of one of the forms \code{'trunk'}, On 1/5/06, martin.v.loewis wrote: > Author: martin.v.loewis > Date: Fri Jan 6 00:38:54 2006 > New Revision: 41929 > > Modified: > python/trunk/Doc/lib/libsys.tex > python/trunk/Include/patchlevel.h > python/trunk/Include/pythonrun.h > python/trunk/Modules/getbuildinfo.c > python/trunk/Python/sysmodule.c > Log: > Drop sys.build_number. Add sys.subversion. > > > Modified: python/trunk/Doc/lib/libsys.tex > ============================================================================== > --- python/trunk/Doc/lib/libsys.tex (original) > +++ python/trunk/Doc/lib/libsys.tex Fri Jan 6 00:38:54 2006 > @@ -27,10 +27,19 @@ > \versionadded{2.0} > \end{datadesc} > > -\begin{datadesc}{build_number} > - A string representing the Subversion revision that this Python executable > - was built from. This number is a string because it may contain a trailing > - 'M' if Python was built from a mixed revision source tree. > +\begin{datadesc}{subversion} > + A triple (repo, branch, version) representing the Subversion > + information of the Python interpreter. > + \var{repo} is the name of the repository, \code{'CPython'}. > + \var{branch} is the a string of one of the forms \code{'trunk'}, > + \code{'branches/name'} or \code{'tags/name'}. > + \var{version} is the output of \code{svnversion}, if the > + interpreter was built from a Subversion checkout; it contains > + the revision number (range) and possibly a trailing 'M' if > + there were local modifications. If the tree was exported > + (or svnversion was not available), it is the revision of > + \code{Include/patchlevel.h} if the branch is a tag. Otherwise, > + it is \code{None}. > \versionadded{2.5} > \end{datadesc} From python-checkins at python.org Fri Jan 6 20:26:49 2006 From: python-checkins at python.org (reinhold.birkenfeld) Date: Fri, 6 Jan 2006 20:26:49 +0100 (CET) Subject: [Python-checkins] r41939 - python/trunk/Doc/lib/libsys.tex Message-ID: <20060106192649.6A7631E4083@bag.python.org> Author: reinhold.birkenfeld Date: Fri Jan 6 20:26:42 2006 New Revision: 41939 Modified: python/trunk/Doc/lib/libsys.tex Log: Correct typo Modified: python/trunk/Doc/lib/libsys.tex ============================================================================== --- python/trunk/Doc/lib/libsys.tex (original) +++ python/trunk/Doc/lib/libsys.tex Fri Jan 6 20:26:42 2006 @@ -31,7 +31,7 @@ A triple (repo, branch, version) representing the Subversion information of the Python interpreter. \var{repo} is the name of the repository, \code{'CPython'}. - \var{branch} is the a string of one of the forms \code{'trunk'}, + \var{branch} is a string of one of the forms \code{'trunk'}, \code{'branches/name'} or \code{'tags/name'}. \var{version} is the output of \code{svnversion}, if the interpreter was built from a Subversion checkout; it contains From python-checkins at python.org Fri Jan 6 20:28:20 2006 From: python-checkins at python.org (reinhold.birkenfeld) Date: Fri, 6 Jan 2006 20:28:20 +0100 (CET) Subject: [Python-checkins] r41940 - python/trunk/Lib/test/test_compiler.py Message-ID: <20060106192820.2EF561E400A@bag.python.org> Author: reinhold.birkenfeld Date: Fri Jan 6 20:28:15 2006 New Revision: 41940 Modified: python/trunk/Lib/test/test_compiler.py Log: Add compiler test regarding optional arguments. Modified: python/trunk/Lib/test/test_compiler.py ============================================================================== --- python/trunk/Lib/test/test_compiler.py (original) +++ python/trunk/Lib/test/test_compiler.py Fri Jan 6 20:28:15 2006 @@ -12,7 +12,6 @@ # standard library and its test suite. This doesn't verify # that any of the code is correct, merely the compiler is able # to generate some kind of code for it. - libdir = os.path.dirname(unittest.__file__) testdir = os.path.dirname(test.test_support.__file__) @@ -36,6 +35,10 @@ def testNewClassSyntax(self): compiler.compile("class foo():pass\n\n","","exec") + + def testSyntaxErrors(self): + self.assertRaises(SyntaxError, compiler.compile, + "def foo(a=1,b):pass\n\n", "", "exec") def testLineNo(self): # Test that all nodes except Module have a correct lineno attribute. From reinhold-birkenfeld-nospam at wolke7.net Fri Jan 6 20:28:33 2006 From: reinhold-birkenfeld-nospam at wolke7.net (Reinhold Birkenfeld) Date: Fri, 06 Jan 2006 20:28:33 +0100 Subject: [Python-checkins] r41929 - in python/trunk: Doc/lib/libsys.tex Include/patchlevel.h Include/pythonrun.h Modules/getbuildinfo.c Python/sysmodule.c In-Reply-To: References: <20060105233857.4094A1E4002@bag.python.org> Message-ID: Fixed. Reinhold Jim Jewett wrote: > minor typo -- " the a " -> " a " > >> + \var{branch} is the a string of one of the forms \code{'trunk'}, > > On 1/5/06, martin.v.loewis wrote: >> Author: martin.v.loewis >> Date: Fri Jan 6 00:38:54 2006 >> New Revision: 41929 >> >> Modified: >> python/trunk/Doc/lib/libsys.tex >> python/trunk/Include/patchlevel.h >> python/trunk/Include/pythonrun.h >> python/trunk/Modules/getbuildinfo.c >> python/trunk/Python/sysmodule.c >> Log: >> Drop sys.build_number. Add sys.subversion. >> >> >> Modified: python/trunk/Doc/lib/libsys.tex >> ============================================================================== >> --- python/trunk/Doc/lib/libsys.tex (original) >> +++ python/trunk/Doc/lib/libsys.tex Fri Jan 6 00:38:54 2006 >> @@ -27,10 +27,19 @@ >> \versionadded{2.0} >> \end{datadesc} >> >> -\begin{datadesc}{build_number} >> - A string representing the Subversion revision that this Python executable >> - was built from. This number is a string because it may contain a trailing >> - 'M' if Python was built from a mixed revision source tree. >> +\begin{datadesc}{subversion} >> + A triple (repo, branch, version) representing the Subversion >> + information of the Python interpreter. >> + \var{repo} is the name of the repository, \code{'CPython'}. >> + \var{branch} is the a string of one of the forms \code{'trunk'}, >> + \code{'branches/name'} or \code{'tags/name'}. >> + \var{version} is the output of \code{svnversion}, if the >> + interpreter was built from a Subversion checkout; it contains >> + the revision number (range) and possibly a trailing 'M' if >> + there were local modifications. If the tree was exported >> + (or svnversion was not available), it is the revision of >> + \code{Include/patchlevel.h} if the branch is a tag. Otherwise, >> + it is \code{None}. >> \versionadded{2.5} >> \end{datadesc} -- Mail address is perfectly valid! From python-checkins at python.org Fri Jan 6 20:57:38 2006 From: python-checkins at python.org (phillip.eby) Date: Fri, 6 Jan 2006 20:57:38 +0100 (CET) Subject: [Python-checkins] r41941 - in sandbox/trunk/setuptools: setuptools/__init__.py setuptools/command/build_ext.py setuptools/extension.py tests/shlib_test/setup.py Message-ID: <20060106195738.193BE1E4002@bag.python.org> Author: phillip.eby Date: Fri Jan 6 20:57:36 2006 New Revision: 41941 Modified: sandbox/trunk/setuptools/setuptools/__init__.py sandbox/trunk/setuptools/setuptools/command/build_ext.py sandbox/trunk/setuptools/setuptools/extension.py sandbox/trunk/setuptools/tests/shlib_test/setup.py Log: SharedLibrary -> Library. For now, Windows libs get built as shared, and other platforms get static. :( Modified: sandbox/trunk/setuptools/setuptools/__init__.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/__init__.py (original) +++ sandbox/trunk/setuptools/setuptools/__init__.py Fri Jan 6 20:57:36 2006 @@ -1,5 +1,5 @@ """Extensions to the 'distutils' for large or complex distributions""" -from setuptools.extension import Extension, SharedLibrary +from setuptools.extension import Extension, Library from setuptools.dist import Distribution, Feature, _get_unpatched import distutils.core, setuptools.command from setuptools.depends import Require Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/build_ext.py (original) +++ sandbox/trunk/setuptools/setuptools/command/build_ext.py Fri Jan 6 20:57:36 2006 @@ -7,7 +7,7 @@ import os, sys from distutils.file_util import copy_file -from setuptools.extension import SharedLibrary +from setuptools.extension import Library from distutils.ccompiler import new_compiler from distutils.sysconfig import customize_compiler @@ -52,7 +52,7 @@ for ext in self.shlibs: if self.get_ext_fullname(ext.name)==fullname: fn, ext = os.path.splitext(filename) - return self.shlib_compiler.library_filename(fn,'shared') + return self.shlib_compiler.library_filename(fn,libtype) return filename def initialize_options(self): @@ -63,7 +63,7 @@ def finalize_options(self): _build_ext.finalize_options(self) self.shlibs = [ext for ext in self.extensions or () - if isinstance(ext,SharedLibrary)] + if isinstance(ext,Library)] if self.shlibs: self.setup_shlib_compiler() self.library_dirs.append(self.build_lib) @@ -71,7 +71,7 @@ def build_extension(self, ext): _compiler = self.compiler try: - if isinstance(ext,SharedLibrary): + if isinstance(ext,Library): self.compiler = self.shlib_compiler _build_ext.build_extension(self,ext) finally: @@ -107,22 +107,11 @@ if self.link_objects is not None: compiler.set_link_objects(self.link_objects) - # hack so distutils' build_extension() builds a shared lib instead - # - def link_shared_object(self, objects, output_libname, output_dir=None, - libraries=None, library_dirs=None, runtime_library_dirs=None, - export_symbols=None, debug=0, extra_preargs=None, - extra_postargs=None, build_temp=None, target_lang=None - ): self.link( - self.SHARED_LIBRARY, objects, output_libname, - output_dir, libraries, library_dirs, runtime_library_dirs, - export_symbols, debug, extra_preargs, extra_postargs, - build_temp, target_lang - ) + # hack so distutils' build_extension() builds a library instead compiler.link_shared_object = link_shared_object.__get__(compiler) def get_export_symbols(self, ext): - if isinstance(ext,SharedLibrary): + if isinstance(ext,Library): return ext.export_symbols return _build_ext.get_export_symbols(self,ext) @@ -132,33 +121,44 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - +if os.name=='nt': + # Build shared libraries on Windows + libtype = 'shared' + def link_shared_object(self, objects, output_libname, output_dir=None, + libraries=None, library_dirs=None, runtime_library_dirs=None, + export_symbols=None, debug=0, extra_preargs=None, + extra_postargs=None, build_temp=None, target_lang=None + ): self.link( + self.SHARED_LIBRARY, objects, output_libname, + output_dir, libraries, library_dirs, runtime_library_dirs, + export_symbols, debug, extra_preargs, extra_postargs, + build_temp, target_lang + ) +else: + # Build static libraries everywhere else + libtype = 'static' + def link_shared_object(self, objects, output_libname, output_dir=None, + libraries=None, library_dirs=None, runtime_library_dirs=None, + export_symbols=None, debug=0, extra_preargs=None, + extra_postargs=None, build_temp=None, target_lang=None + ): + # XXX we need to either disallow these attrs on Library instances, + # or warn/abort here if set, or something... + #libraries=None, library_dirs=None, runtime_library_dirs=None, + #export_symbols=None, extra_preargs=None, extra_postargs=None, + #build_temp=None + + assert output_dir is None # distutils build_ext doesn't pass this + output_dir,filename = os.path.split(output_libname) + basename, ext = os.path.splitext(filename) + if self.library_filename("x").startswith('lib'): + # strip 'lib' prefix; this is kludgy if some platform uses + # a different prefix + basename = basename[3:] + + self.create_static_lib( + objects, basename, output_dir, debug, target_lang + ) Modified: sandbox/trunk/setuptools/setuptools/extension.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/extension.py (original) +++ sandbox/trunk/setuptools/setuptools/extension.py Fri Jan 6 20:57:36 2006 @@ -25,8 +25,8 @@ sources.append(s) self.sources = sources -class SharedLibrary(Extension): - """Just like a regular Extension, but built as a shared library instead""" +class Library(Extension): + """Just like a regular Extension, but built as a library instead""" import sys, distutils.core, distutils.extension distutils.core.Extension = Extension Modified: sandbox/trunk/setuptools/tests/shlib_test/setup.py ============================================================================== --- sandbox/trunk/setuptools/tests/shlib_test/setup.py (original) +++ sandbox/trunk/setuptools/tests/shlib_test/setup.py Fri Jan 6 20:57:36 2006 @@ -1,9 +1,9 @@ -from setuptools import setup, Extension, SharedLibrary +from setuptools import setup, Extension, Library setup( name="shlib_test", ext_modules = [ - SharedLibrary("hellolib", ["hellolib.c"]), + Library("hellolib", ["hellolib.c"]), Extension("hello", ["hello.pyx"], libraries=["hellolib"]) ], test_suite="test_hello.HelloWorldTest", From python-checkins at python.org Sat Jan 7 01:19:59 2006 From: python-checkins at python.org (phillip.eby) Date: Sat, 7 Jan 2006 01:19:59 +0100 (CET) Subject: [Python-checkins] r41942 - in sandbox/trunk/setuptools: EasyInstall.txt pkg_resources.py setuptools/package_index.py Message-ID: <20060107001959.A57161E400A@bag.python.org> Author: phillip.eby Date: Sat Jan 7 01:19:58 2006 New Revision: 41942 Modified: sandbox/trunk/setuptools/EasyInstall.txt sandbox/trunk/setuptools/pkg_resources.py sandbox/trunk/setuptools/setuptools/package_index.py Log: PyPI searches now use the exact spelling of requirements specified on the command line or in a project's ``install_requires``. Previously, a normalized form of the name was used, which could lead to unnecessary full-index searches when a project's name had an underscore (``_``) in it. Modified: sandbox/trunk/setuptools/EasyInstall.txt ============================================================================== --- sandbox/trunk/setuptools/EasyInstall.txt (original) +++ sandbox/trunk/setuptools/EasyInstall.txt Sat Jan 7 01:19:58 2006 @@ -962,6 +962,12 @@ * There's no automatic retry for borked Sourceforge mirrors, which can easily time out or be missing a file. +0.6a10 + * PyPI searches now use the exact spelling of requirements specified on the + command line or in a project's ``install_requires``. Previously, a + normalized form of the name was used, which could lead to unnecessary + full-index searches when a project's name had an underscore (``_``) in it. + 0.6a9 * Fixed ``.pth`` file processing picking up nested eggs (i.e. ones inside Modified: sandbox/trunk/setuptools/pkg_resources.py ============================================================================== --- sandbox/trunk/setuptools/pkg_resources.py (original) +++ sandbox/trunk/setuptools/pkg_resources.py Sat Jan 7 01:19:58 2006 @@ -1529,8 +1529,8 @@ LINE_END = re.compile(r"\s*(#.*)?$").match # whitespace and comment CONTINUE = re.compile(r"\s*\\\s*(#.*)?$").match # line continuation -DISTRO = re.compile(r"\s*(\w+)").match # Distribution or option -VERSION = re.compile(r"\s*(<=?|>=?|==|!=)\s*((\w|\.)+)").match # version info +DISTRO = re.compile(r"\s*((\w|-)+)").match # Distribution or option +VERSION = re.compile(r"\s*(<=?|>=?|==|!=)\s*((\w|[-.])+)").match # ver. info COMMA = re.compile(r"\s*,").match # comma between items OBRACKET = re.compile(r"\s*\[").match CBRACKET = re.compile(r"\s*\]").match @@ -1982,7 +1982,7 @@ while not TERMINATOR(line,p): if CONTINUE(line,p): try: - line = lines.next().replace('-','_'); p = 0 + line = lines.next(); p = 0 except StopIteration: raise ValueError( "\\ must not appear on the last nonblank line" @@ -2008,7 +2008,6 @@ return line, p, items for line in lines: - line = line.replace('-','_') match = DISTRO(line) if not match: raise ValueError("Missing distribution spec", line) @@ -2024,8 +2023,8 @@ ) line, p, specs = scan_list(VERSION,LINE_END,line,p,(1,2),"version spec") - specs = [(op,val.replace('_','-')) for op,val in specs] - yield Requirement(project_name.replace('_','-'), specs, extras) + specs = [(op,safe_version(val)) for op,val in specs] + yield Requirement(project_name, specs, extras) def _sort_dists(dists): @@ -2048,9 +2047,11 @@ + class Requirement: def __init__(self, project_name, specs, extras): """DO NOT CALL THIS UNDOCUMENTED METHOD; use Requirement.parse()!""" + self.unsafe_name, project_name = project_name, safe_name(project_name) self.project_name, self.key = project_name, project_name.lower() index = [(parse_version(v),state_machine[op],op,v) for op,v in specs] index.sort() @@ -2068,8 +2069,6 @@ if extras: extras = '[%s]' % extras return '%s%s%s' % (self.project_name, extras, specs) - def __repr__(self): return "Requirement.parse(%r)" % str(self) - def __eq__(self,other): return isinstance(other,Requirement) and self.hashCmp==other.hashCmp @@ -2089,9 +2088,12 @@ if last is None: last = True # no rules encountered return last + def __hash__(self): return self.__hash + def __repr__(self): return "Requirement.parse(%r)" % str(self) + #@staticmethod def parse(s): reqs = list(parse_requirements(s)) @@ -2103,7 +2105,6 @@ parse = staticmethod(parse) - state_machine = { # =>< '<' : '--T', @@ -2122,7 +2123,6 @@ return cls.__mro__[1:] return cls.__mro__ - def _find_adapter(registry, ob): """Return an adapter factory for `ob` from `registry`""" for t in _get_mro(getattr(ob, '__class__', type(ob))): Modified: sandbox/trunk/setuptools/setuptools/package_index.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/package_index.py (original) +++ sandbox/trunk/setuptools/setuptools/package_index.py Sat Jan 7 01:19:58 2006 @@ -245,12 +245,16 @@ def find_packages(self, requirement): - self.scan_url(self.index_url + requirement.project_name+'/') + self.scan_url(self.index_url + requirement.unsafe_name+'/') + if not self.package_pages.get(requirement.key): + # Fall back to safe version of the name + self.scan_url(self.index_url + requirement.project_name+'/') + if not self.package_pages.get(requirement.key): # We couldn't find the target package, so search the index page too self.warn( "Couldn't find index page for %r (maybe misspelled?)", - requirement.project_name + requirement.unsafe_name ) if self.index_url not in self.fetched_urls: self.warn( @@ -281,10 +285,6 @@ "; possible download problem?" ) - - - - def download(self, spec, tmpdir): """Locate and/or download `spec` to `tmpdir`, returning a local path From python-checkins at python.org Sat Jan 7 04:49:39 2006 From: python-checkins at python.org (martin.v.loewis) Date: Sat, 7 Jan 2006 04:49:39 +0100 (CET) Subject: [Python-checkins] r41943 - in python/branches/ssize_t: Doc/lib/libcookielib.tex Doc/lib/libos.tex Doc/lib/libsys.tex Include/patchlevel.h Include/pythonrun.h Lib/platform.py Lib/test/test_compiler.py Lib/test/test_curses.py Lib/test/test_linuxaudiodev.py Lib/test/test_ossaudiodev.py Makefile.pre.in Misc/NEWS Misc/build.sh Modules/_bsddb.c Modules/getbuildinfo.c Modules/getpath.c Modules/zlib Objects/unicodeobject.c PC/config.c PCbuild/_bsddb.vcproj PCbuild/_socket.vcproj PCbuild/_testcapi.vcproj PCbuild/_tkinter.vcproj PCbuild/bz2.vcproj PCbuild/make_versioninfo.vcproj PCbuild/pcbuild.sln PCbuild/pyexpat.vcproj PCbuild/python.vcproj PCbuild/pythoncore.vcproj PCbuild/pythonw.vcproj PCbuild/readme.txt PCbuild/select.vcproj PCbuild/unicodedata.vcproj PCbuild/w9xpopen.vcproj PCbuild/winsound.vcproj PCbuild/zlib.vcproj Python/dynload_shlib.c Python/sysmodule.c Tools/msi/msi.py configure configure.in Message-ID: <20060107034939.2B6C21E4002@bag.python.org> Author: martin.v.loewis Date: Sat Jan 7 04:49:32 2006 New Revision: 41943 Added: python/branches/ssize_t/Misc/build.sh - copied unchanged from r41942, python/trunk/Misc/build.sh python/branches/ssize_t/Modules/zlib/ - copied from r41942, python/trunk/Modules/zlib/ Removed: python/branches/ssize_t/PCbuild/zlib.vcproj Modified: python/branches/ssize_t/ (props changed) python/branches/ssize_t/Doc/lib/libcookielib.tex python/branches/ssize_t/Doc/lib/libos.tex python/branches/ssize_t/Doc/lib/libsys.tex python/branches/ssize_t/Include/patchlevel.h python/branches/ssize_t/Include/pythonrun.h python/branches/ssize_t/Lib/platform.py python/branches/ssize_t/Lib/test/test_compiler.py python/branches/ssize_t/Lib/test/test_curses.py python/branches/ssize_t/Lib/test/test_linuxaudiodev.py python/branches/ssize_t/Lib/test/test_ossaudiodev.py python/branches/ssize_t/Makefile.pre.in python/branches/ssize_t/Misc/NEWS python/branches/ssize_t/Modules/_bsddb.c python/branches/ssize_t/Modules/getbuildinfo.c (contents, props changed) python/branches/ssize_t/Modules/getpath.c python/branches/ssize_t/Objects/unicodeobject.c python/branches/ssize_t/PC/config.c python/branches/ssize_t/PCbuild/_bsddb.vcproj python/branches/ssize_t/PCbuild/_socket.vcproj python/branches/ssize_t/PCbuild/_testcapi.vcproj python/branches/ssize_t/PCbuild/_tkinter.vcproj python/branches/ssize_t/PCbuild/bz2.vcproj python/branches/ssize_t/PCbuild/make_versioninfo.vcproj python/branches/ssize_t/PCbuild/pcbuild.sln python/branches/ssize_t/PCbuild/pyexpat.vcproj python/branches/ssize_t/PCbuild/python.vcproj python/branches/ssize_t/PCbuild/pythoncore.vcproj python/branches/ssize_t/PCbuild/pythonw.vcproj python/branches/ssize_t/PCbuild/readme.txt python/branches/ssize_t/PCbuild/select.vcproj python/branches/ssize_t/PCbuild/unicodedata.vcproj python/branches/ssize_t/PCbuild/w9xpopen.vcproj python/branches/ssize_t/PCbuild/winsound.vcproj python/branches/ssize_t/Python/dynload_shlib.c python/branches/ssize_t/Python/sysmodule.c (contents, props changed) python/branches/ssize_t/Tools/msi/msi.py python/branches/ssize_t/configure python/branches/ssize_t/configure.in Log: Merge with trunk:41942. Modified: python/branches/ssize_t/Doc/lib/libcookielib.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libcookielib.tex (original) +++ python/branches/ssize_t/Doc/lib/libcookielib.tex Sat Jan 7 04:49:32 2006 @@ -36,7 +36,9 @@ \begin{excdesc}{LoadError} Instances of \class{FileCookieJar} raise this exception on failure to -load cookies from a file. +load cookies from a file. \note{For backwards-compatibility +with Python 2.4 (which raised an \exception{IOError}), +\exception{LoadError} is a subclass of \exception{IOError}}. \end{excdesc} @@ -273,16 +275,19 @@ Arguments are as for \method{save()}. The named file must be in the format understood by the class, or -\exception{LoadError} will be raised. +\exception{LoadError} will be raised. Also, \exception{IOError} may +be raised, for example if the file does not exist. \note{For +backwards-compatibility with Python 2.4 (which raised +an \exception{IOError}), \exception{LoadError} is a subclass +of \exception{IOError}.} \end{methoddesc} \begin{methoddesc}[FileCookieJar]{revert}{filename=\constant{None}, ignore_discard=\constant{False}, ignore_expires=\constant{False}} Clear all cookies and reload cookies from a saved file. -Raises \exception{cookielib.LoadError} or \exception{IOError} if -reversion is not successful; the object's state will not be altered if -this happens. +\method{revert()} can raise the same exceptions as \method{load()}. +If there is a failure, the object's state will not be altered. \end{methoddesc} \class{FileCookieJar} instances have the following public attributes: @@ -579,7 +584,7 @@ \end{memberdesc} -\subsection{Cookie Objects \label{cookie-jar-objects}} +\subsection{Cookie Objects \label{cookie-objects}} \class{Cookie} instances have Python attributes roughly corresponding to the standard cookie-attributes specified in the various cookie Modified: python/branches/ssize_t/Doc/lib/libos.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libos.tex (original) +++ python/branches/ssize_t/Doc/lib/libos.tex Sat Jan 7 04:49:32 2006 @@ -832,9 +832,9 @@ doesn't open the FIFO --- it just creates the rendezvous point. \end{funcdesc} -\begin{funcdesc}{mknod}{path\optional{, mode=0600, device}} +\begin{funcdesc}{mknod}{filename\optional{, mode=0600, device}} Create a filesystem node (file, device special file or named pipe) -named filename. \var{mode} specifies both the permissions to use and +named \var{filename}. \var{mode} specifies both the permissions to use and the type of node to be created, being combined (bitwise OR) with one of S_IFREG, S_IFCHR, S_IFBLK, and S_IFIFO (those constants are available in \module{stat}). For S_IFCHR and S_IFBLK, \var{device} Modified: python/branches/ssize_t/Doc/lib/libsys.tex ============================================================================== --- python/branches/ssize_t/Doc/lib/libsys.tex (original) +++ python/branches/ssize_t/Doc/lib/libsys.tex Sat Jan 7 04:49:32 2006 @@ -27,10 +27,19 @@ \versionadded{2.0} \end{datadesc} -\begin{datadesc}{build_number} - A string representing the Subversion revision that this Python executable - was built from. This number is a string because it may contain a trailing - 'M' if Python was built from a mixed revision source tree. +\begin{datadesc}{subversion} + A triple (repo, branch, version) representing the Subversion + information of the Python interpreter. + \var{repo} is the name of the repository, \code{'CPython'}. + \var{branch} is a string of one of the forms \code{'trunk'}, + \code{'branches/name'} or \code{'tags/name'}. + \var{version} is the output of \code{svnversion}, if the + interpreter was built from a Subversion checkout; it contains + the revision number (range) and possibly a trailing 'M' if + there were local modifications. If the tree was exported + (or svnversion was not available), it is the revision of + \code{Include/patchlevel.h} if the branch is a tag. Otherwise, + it is \code{None}. \versionadded{2.5} \end{datadesc} Modified: python/branches/ssize_t/Include/patchlevel.h ============================================================================== --- python/branches/ssize_t/Include/patchlevel.h (original) +++ python/branches/ssize_t/Include/patchlevel.h Sat Jan 7 04:49:32 2006 @@ -28,6 +28,9 @@ /* Version as a string */ #define PY_VERSION "2.5a0" +/* Subversion Revision number of this file (not of the repository) */ +#define PY_PATCHLEVEL_REVISION "$Revision$" + /* Version as a single 4-byte hex number, e.g. 0x010502B2 == 1.5.2b2. Use this for numeric comparisons, e.g. #if PY_VERSION_HEX >= ... */ #define PY_VERSION_HEX ((PY_MAJOR_VERSION << 24) | \ Modified: python/branches/ssize_t/Include/pythonrun.h ============================================================================== --- python/branches/ssize_t/Include/pythonrun.h (original) +++ python/branches/ssize_t/Include/pythonrun.h Sat Jan 7 04:49:32 2006 @@ -108,7 +108,9 @@ PyAPI_FUNC(const char *) Py_GetCopyright(void); PyAPI_FUNC(const char *) Py_GetCompiler(void); PyAPI_FUNC(const char *) Py_GetBuildInfo(void); -PyAPI_FUNC(const char *) Py_GetBuildNumber(void); +PyAPI_FUNC(const char *) _Py_svnversion(void); +PyAPI_FUNC(const char *) Py_SubversionRevision(void); +PyAPI_FUNC(const char *) Py_SubversionShortBranch(void); /* Internal -- various one-time initializations */ PyAPI_FUNC(PyObject *) _PyBuiltin_Init(void); Modified: python/branches/ssize_t/Lib/platform.py ============================================================================== --- python/branches/ssize_t/Lib/platform.py (original) +++ python/branches/ssize_t/Lib/platform.py Sat Jan 7 04:49:32 2006 @@ -1092,7 +1092,7 @@ ### Various APIs for extracting information from sys.version _sys_version_parser = re.compile(r'([\w.+]+)\s*' - '\(#?(\d+:?\d+M?),\s*([\w ]+),\s*([\w :]+)\)\s*' + '\(#?([^,]+),\s*([\w ]+),\s*([\w :]+)\)\s*' '\[([^\]]+)\]?') _sys_version_cache = None Modified: python/branches/ssize_t/Lib/test/test_compiler.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_compiler.py (original) +++ python/branches/ssize_t/Lib/test/test_compiler.py Sat Jan 7 04:49:32 2006 @@ -12,7 +12,6 @@ # standard library and its test suite. This doesn't verify # that any of the code is correct, merely the compiler is able # to generate some kind of code for it. - libdir = os.path.dirname(unittest.__file__) testdir = os.path.dirname(test.test_support.__file__) @@ -36,6 +35,10 @@ def testNewClassSyntax(self): compiler.compile("class foo():pass\n\n","","exec") + + def testSyntaxErrors(self): + self.assertRaises(SyntaxError, compiler.compile, + "def foo(a=1,b):pass\n\n", "", "exec") def testLineNo(self): # Test that all nodes except Module have a correct lineno attribute. Modified: python/branches/ssize_t/Lib/test/test_curses.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_curses.py (original) +++ python/branches/ssize_t/Lib/test/test_curses.py Sat Jan 7 04:49:32 2006 @@ -8,14 +8,19 @@ # getmouse(), ungetmouse(), init_color() # -import curses, sys, tempfile +import curses, sys, tempfile, os # Optionally test curses module. This currently requires that the # 'curses' resource be given on the regrtest command line using the -u # option. If not available, nothing after this line will be executed. -from test import test_support -test_support.requires('curses') +from test.test_support import requires, TestSkipped +requires('curses') + +# XXX: if newterm was supported we could use it instead of initscr and not exit +term = os.environ.get('TERM') +if not term or term == 'unknown': + raise TestSkipped, "$TERM=%r, calling initscr() may cause exit" % term def window_funcs(stdscr): "Test the methods of windows" Modified: python/branches/ssize_t/Lib/test/test_linuxaudiodev.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_linuxaudiodev.py (original) +++ python/branches/ssize_t/Lib/test/test_linuxaudiodev.py Sat Jan 7 04:49:32 2006 @@ -28,7 +28,7 @@ try: a = linuxaudiodev.open('w') except linuxaudiodev.error, msg: - if msg[0] in (errno.EACCES, errno.ENODEV, errno.EBUSY): + if msg[0] in (errno.EACCES, errno.ENOENT, errno.ENODEV, errno.EBUSY): raise TestSkipped, msg raise TestFailed, msg Modified: python/branches/ssize_t/Lib/test/test_ossaudiodev.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_ossaudiodev.py (original) +++ python/branches/ssize_t/Lib/test/test_ossaudiodev.py Sat Jan 7 04:49:32 2006 @@ -45,7 +45,7 @@ try: dsp = ossaudiodev.open('w') except IOError, msg: - if msg[0] in (errno.EACCES, errno.ENODEV, errno.EBUSY): + if msg[0] in (errno.EACCES, errno.ENOENT, errno.ENODEV, errno.EBUSY): raise TestSkipped, msg raise TestFailed, msg Modified: python/branches/ssize_t/Makefile.pre.in ============================================================================== --- python/branches/ssize_t/Makefile.pre.in (original) +++ python/branches/ssize_t/Makefile.pre.in Sat Jan 7 04:49:32 2006 @@ -33,6 +33,7 @@ LINKCC= @LINKCC@ AR= @AR@ RANLIB= @RANLIB@ +SVNVERSION= @SVNVERSION@ # Shell used by make (some versions default to the login shell, which is bad) SHELL= /bin/sh @@ -341,21 +342,6 @@ *) $(RUNSHARED) CC='$(CC)' LDSHARED='$(BLDSHARED)' OPT='$(OPT)' ./$(BUILDPYTHON) -E $(srcdir)/setup.py build;; \ esac -# buildno should really depend on something like LIBRARY_SRC -buildno: $(PARSER_OBJS) \ - $(OBJECT_OBJS) \ - $(PYTHON_OBJS) \ - $(MODULE_OBJS) \ - $(SIGNAL_OBJS) \ - $(MODOBJS) \ - $(srcdir)/Modules/getbuildinfo.c - if test -d $(srcdir)/.svn; then \ - svnversion $(srcdir) >buildno; \ - elif test -f buildno; then \ - expr `cat buildno` + 1 >buildno1; \ - mv -f buildno1 buildno; \ - else echo 1 >buildno; fi - # Build static library # avoid long command lines, same as LIBRARY_OBJS $(LIBRARY): $(LIBRARY_OBJS) @@ -445,8 +431,14 @@ ############################################################################ # Special rules for object files -Modules/getbuildinfo.o: $(srcdir)/Modules/getbuildinfo.c buildno - $(CC) -c $(PY_CFLAGS) -DBUILD=\"`cat buildno`\" -o $@ $(srcdir)/Modules/getbuildinfo.c +Modules/getbuildinfo.o: $(PARSER_OBJS) \ + $(OBJECT_OBJS) \ + $(PYTHON_OBJS) \ + $(MODULE_OBJS) \ + $(SIGNAL_OBJS) \ + $(MODOBJS) \ + $(srcdir)/Modules/getbuildinfo.c + $(CC) -c $(PY_CFLAGS) -DSVNVERSION=\"`LANG=C $(SVNVERSION)`\" -o $@ $(srcdir)/Modules/getbuildinfo.c Modules/getpath.o: $(srcdir)/Modules/getpath.c Makefile $(CC) -c $(PY_CFLAGS) -DPYTHONPATH='"$(PYTHONPATH)"' \ @@ -988,10 +980,10 @@ # Make things extra clean, before making a distribution: # remove all generated files, even Makefile[.pre] +# Keep configure and Python-ast.[ch], it's possible they can't be generated distclean: clobber - -rm -f core Makefile Makefile.pre buildno config.status \ + -rm -f core Makefile Makefile.pre config.status \ Modules/Setup Modules/Setup.local Modules/Setup.config - -rm -f $(AST_H) $(AST_C) find $(srcdir) '(' -name '*.fdc' -o -name '*~' \ -o -name '[@,#]*' -o -name '*.old' \ -o -name '*.orig' -o -name '*.rej' \ Modified: python/branches/ssize_t/Misc/NEWS ============================================================================== --- python/branches/ssize_t/Misc/NEWS (original) +++ python/branches/ssize_t/Misc/NEWS Sat Jan 7 04:49:32 2006 @@ -209,6 +209,8 @@ Extension Modules ----------------- +- Fix 64-bit problems in bsddb. + - Patch #1365916: fix some unsafe 64-bit mmap methods. - Bug #1290333: Added a workaround for cjkcodecs' _codecs_cn build @@ -631,6 +633,9 @@ Build ----- +- The sources of zlib are now part of the Python distribution (zlib 1.2.3). + The zlib module is now builtin on Windows. + - Use -xcode=pic32 for CCSHARED on Solaris with SunPro. - Bug #1189330: configure did not correctly determine the necessary Modified: python/branches/ssize_t/Modules/_bsddb.c ============================================================================== --- python/branches/ssize_t/Modules/_bsddb.c (original) +++ python/branches/ssize_t/Modules/_bsddb.c Sat Jan 7 04:49:32 2006 @@ -1522,7 +1522,7 @@ if (self->primaryDBType == DB_RECNO || self->primaryDBType == DB_QUEUE) - pkeyObj = PyInt_FromLong(*(long *)pkey.data); + pkeyObj = PyInt_FromLong(*(int *)pkey.data); else pkeyObj = PyString_FromStringAndSize(pkey.data, pkey.size); @@ -1531,7 +1531,7 @@ PyObject *keyObj; int type = _DB_get_type(self); if (type == DB_RECNO || type == DB_QUEUE) - keyObj = PyInt_FromLong(*(long *)key.data); + keyObj = PyInt_FromLong(*(int *)key.data); else keyObj = PyString_FromStringAndSize(key.data, key.size); retval = Py_BuildValue("OOO", keyObj, pkeyObj, dataObj); @@ -3172,7 +3172,7 @@ if (self->mydb->primaryDBType == DB_RECNO || self->mydb->primaryDBType == DB_QUEUE) - pkeyObj = PyInt_FromLong(*(long *)pkey.data); + pkeyObj = PyInt_FromLong(*(int *)pkey.data); else pkeyObj = PyString_FromStringAndSize(pkey.data, pkey.size); @@ -3181,7 +3181,7 @@ PyObject *keyObj; int type = _DB_get_type(self->mydb); if (type == DB_RECNO || type == DB_QUEUE) - keyObj = PyInt_FromLong(*(long *)key.data); + keyObj = PyInt_FromLong(*(int *)key.data); else keyObj = PyString_FromStringAndSize(key.data, key.size); retval = Py_BuildValue("OOO", keyObj, pkeyObj, dataObj); Modified: python/branches/ssize_t/Modules/getbuildinfo.c ============================================================================== --- python/branches/ssize_t/Modules/getbuildinfo.c (original) +++ python/branches/ssize_t/Modules/getbuildinfo.c Sat Jan 7 04:49:32 2006 @@ -20,21 +20,25 @@ #endif #endif -#ifndef BUILD -#define BUILD "0" -#endif - const char * Py_GetBuildInfo(void) { static char buildinfo[50]; + const char *revision = Py_SubversionRevision(); + const char *sep = *revision ? ":" : ""; + const char *branch = Py_SubversionShortBranch(); PyOS_snprintf(buildinfo, sizeof(buildinfo), - "%s, %.20s, %.9s", BUILD, DATE, TIME); + "%s%s%s, %.20s, %.9s", branch, sep, revision, + DATE, TIME); return buildinfo; } const char * -Py_GetBuildNumber(void) +_Py_svnversion(void) { - return BUILD; +#ifdef SVNVERSION + return SVNVERSION; +#else + return "exported"; +#endif } Modified: python/branches/ssize_t/Modules/getpath.c ============================================================================== --- python/branches/ssize_t/Modules/getpath.c (original) +++ python/branches/ssize_t/Modules/getpath.c Sat Jan 7 04:49:32 2006 @@ -381,7 +381,11 @@ NSModule pythonModule; #endif #ifdef __APPLE__ +#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4 uint32_t nsexeclength = MAXPATHLEN; +#else + unsigned long nsexeclength = MAXPATHLEN; +#endif #endif /* If there is no slash in the argv0 path, then we have to Modified: python/branches/ssize_t/Objects/unicodeobject.c ============================================================================== --- python/branches/ssize_t/Objects/unicodeobject.c (original) +++ python/branches/ssize_t/Objects/unicodeobject.c Sat Jan 7 04:49:32 2006 @@ -5369,7 +5369,7 @@ return PyBool_FromLong(Py_UNICODE_ISLOWER(*p)); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -5403,7 +5403,7 @@ return PyBool_FromLong(Py_UNICODE_ISUPPER(*p) != 0); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -5440,7 +5440,7 @@ (Py_UNICODE_ISUPPER(*p) != 0)); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -5485,7 +5485,7 @@ return PyBool_FromLong(1); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -5514,7 +5514,7 @@ return PyBool_FromLong(1); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -5543,7 +5543,7 @@ return PyBool_FromLong(1); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -5572,7 +5572,7 @@ return PyBool_FromLong(1); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -5601,7 +5601,7 @@ return PyBool_FromLong(1); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -5630,7 +5630,7 @@ return PyBool_FromLong(1); /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) + if (PyUnicode_GET_SIZE(self) == 0) return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); @@ -6467,7 +6467,7 @@ if (i == -1 && PyErr_Occurred()) return NULL; if (i < 0) - i += PyString_GET_SIZE(self); + i += PyUnicode_GET_SIZE(self); return unicode_getitem(self, i); } else if (PySlice_Check(item)) { Py_ssize_t start, stop, step, slicelength, cur, i; @@ -6475,7 +6475,7 @@ Py_UNICODE* result_buf; PyObject* result; - if (PySlice_GetIndicesEx((PySliceObject*)item, PyString_GET_SIZE(self), + if (PySlice_GetIndicesEx((PySliceObject*)item, PyUnicode_GET_SIZE(self), &start, &stop, &step, &slicelength) < 0) { return NULL; } @@ -6485,6 +6485,9 @@ } else { source_buf = PyUnicode_AS_UNICODE((PyObject*)self); result_buf = PyMem_MALLOC(slicelength*sizeof(Py_UNICODE)); + + if (result_buf == NULL) + return PyErr_NoMemory(); for (cur = start, i = 0; i < slicelength; cur += step, i++) { result_buf[i] = source_buf[cur]; Modified: python/branches/ssize_t/PC/config.c ============================================================================== --- python/branches/ssize_t/PC/config.c (original) +++ python/branches/ssize_t/PC/config.c Sat Jan 7 04:49:32 2006 @@ -56,6 +56,7 @@ extern void init_winreg(void); extern void initdatetime(void); extern void initfunctional(void); +extern void initzlib(void); extern void init_multibytecodec(void); extern void init_codecs_cn(void); @@ -133,7 +134,8 @@ {"xxsubtype", initxxsubtype}, {"zipimport", initzipimport}, - + {"zlib", initzlib}, + /* CJK codecs */ {"_multibytecodec", init_multibytecodec}, {"_codecs_cn", init_codecs_cn}, Modified: python/branches/ssize_t/PCbuild/_bsddb.vcproj ============================================================================== --- python/branches/ssize_t/PCbuild/_bsddb.vcproj (original) +++ python/branches/ssize_t/PCbuild/_bsddb.vcproj Sat Jan 7 04:49:32 2006 @@ -24,10 +24,6 @@ PreprocessorDefinitions="_DEBUG;WIN32;_WINDOWS" RuntimeLibrary="3" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-debug\_bsddb/_bsddb.pch" - AssemblerListingLocation=".\x86-temp-debug\_bsddb/" - ObjectFile=".\x86-temp-debug\_bsddb/" - ProgramDataBaseFileName=".\x86-temp-debug\_bsddb/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -48,13 +44,7 @@ ImportLibrary=".\./_bsddb_d.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - Modified: python/branches/ssize_t/PCbuild/_socket.vcproj ============================================================================== --- python/branches/ssize_t/PCbuild/_socket.vcproj (original) +++ python/branches/ssize_t/PCbuild/_socket.vcproj Sat Jan 7 04:49:32 2006 @@ -24,10 +24,6 @@ PreprocessorDefinitions="_DEBUG;WIN32;_WINDOWS" RuntimeLibrary="3" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-debug\_socket/_socket.pch" - AssemblerListingLocation=".\x86-temp-debug\_socket/" - ObjectFile=".\x86-temp-debug\_socket/" - ProgramDataBaseFileName=".\x86-temp-debug\_socket/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -47,13 +43,7 @@ ImportLibrary=".\./_socket_d.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - Modified: python/branches/ssize_t/PCbuild/_testcapi.vcproj ============================================================================== --- python/branches/ssize_t/PCbuild/_testcapi.vcproj (original) +++ python/branches/ssize_t/PCbuild/_testcapi.vcproj Sat Jan 7 04:49:32 2006 @@ -28,10 +28,6 @@ RuntimeLibrary="2" EnableFunctionLevelLinking="TRUE" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-release\_testcapi/_testcapi.pch" - AssemblerListingLocation=".\x86-temp-release\_testcapi/" - ObjectFile=".\x86-temp-release\_testcapi/" - ProgramDataBaseFileName=".\x86-temp-release\_testcapi/" WarningLevel="3" SuppressStartupBanner="TRUE" CompileAs="0"/> @@ -47,13 +43,7 @@ ImportLibrary=".\./_testcapi.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - Modified: python/branches/ssize_t/PCbuild/_tkinter.vcproj ============================================================================== --- python/branches/ssize_t/PCbuild/_tkinter.vcproj (original) +++ python/branches/ssize_t/PCbuild/_tkinter.vcproj Sat Jan 7 04:49:32 2006 @@ -27,10 +27,6 @@ RuntimeLibrary="2" EnableFunctionLevelLinking="TRUE" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-release\_tkinter/_tkinter.pch" - AssemblerListingLocation=".\x86-temp-release\_tkinter/" - ObjectFile=".\x86-temp-release\_tkinter/" - ProgramDataBaseFileName=".\x86-temp-release\_tkinter/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -51,13 +47,7 @@ ImportLibrary=".\./_tkinter.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - - - - - - - - - - Modified: python/branches/ssize_t/PCbuild/bz2.vcproj ============================================================================== --- python/branches/ssize_t/PCbuild/bz2.vcproj (original) +++ python/branches/ssize_t/PCbuild/bz2.vcproj Sat Jan 7 04:49:32 2006 @@ -25,10 +25,6 @@ PreprocessorDefinitions="_DEBUG;WIN32;_WINDOWS" RuntimeLibrary="3" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-debug\bz2/bz2.pch" - AssemblerListingLocation=".\x86-temp-debug\bz2/" - ObjectFile=".\x86-temp-debug\bz2/" - ProgramDataBaseFileName=".\x86-temp-debug\bz2/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -49,13 +45,7 @@ ImportLibrary=".\./bz2_d.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - Modified: python/branches/ssize_t/PCbuild/make_versioninfo.vcproj ============================================================================== --- python/branches/ssize_t/PCbuild/make_versioninfo.vcproj (original) +++ python/branches/ssize_t/PCbuild/make_versioninfo.vcproj Sat Jan 7 04:49:32 2006 @@ -29,10 +29,6 @@ RuntimeLibrary="2" EnableFunctionLevelLinking="TRUE" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-release\make_versioninfo/make_versioninfo.pch" - AssemblerListingLocation=".\x86-temp-release\make_versioninfo/" - ObjectFile=".\x86-temp-release\make_versioninfo/" - ProgramDataBaseFileName=".\x86-temp-release\make_versioninfo/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -54,9 +50,7 @@ BaseAddress="0x1d000000" TargetMachine="1"/> + Name="VCMIDLTool"/> @@ -65,9 +59,7 @@ + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> @@ -131,10 +117,7 @@ + Name="VCResourceCompilerTool"/> - - - - - - Modified: python/branches/ssize_t/PCbuild/pcbuild.sln ============================================================================== Binary files. No diff available. Modified: python/branches/ssize_t/PCbuild/pyexpat.vcproj ============================================================================== --- python/branches/ssize_t/PCbuild/pyexpat.vcproj (original) +++ python/branches/ssize_t/PCbuild/pyexpat.vcproj Sat Jan 7 04:49:32 2006 @@ -24,10 +24,6 @@ PreprocessorDefinitions="_DEBUG;HAVE_EXPAT_H;WIN32;_WINDOWS;XML_NS;XML_DTD;BYTEORDER=1234;XML_CONTEXT_BYTES=1024;XML_STATIC;HAVE_MEMMOVE" RuntimeLibrary="3" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-debug\pyexpat/pyexpat.pch" - AssemblerListingLocation=".\x86-temp-debug\pyexpat/" - ObjectFile=".\x86-temp-debug\pyexpat/" - ProgramDataBaseFileName=".\x86-temp-debug\pyexpat/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -47,13 +43,7 @@ ImportLibrary=".\./pyexpat_d.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Modified: python/branches/ssize_t/PCbuild/python.vcproj ============================================================================== --- python/branches/ssize_t/PCbuild/python.vcproj (original) +++ python/branches/ssize_t/PCbuild/python.vcproj Sat Jan 7 04:49:32 2006 @@ -28,10 +28,6 @@ RuntimeLibrary="2" EnableFunctionLevelLinking="TRUE" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-release\python/python.pch" - AssemblerListingLocation=".\x86-temp-release\python/" - ObjectFile=".\x86-temp-release\python/" - ProgramDataBaseFileName=".\x86-temp-release\python/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -51,9 +47,7 @@ BaseAddress="0x1d000000" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCMIDLTool"/> + Name="VCMIDLTool"/> - - - - - - - - - - - - - - - - - - Modified: python/branches/ssize_t/PCbuild/pythoncore.vcproj ============================================================================== --- python/branches/ssize_t/PCbuild/pythoncore.vcproj (original) +++ python/branches/ssize_t/PCbuild/pythoncore.vcproj Sat Jan 7 04:49:32 2006 @@ -29,10 +29,6 @@ RuntimeLibrary="2" EnableFunctionLevelLinking="TRUE" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-release\pythoncore/pythoncore.pch" - AssemblerListingLocation=".\x86-temp-release\pythoncore/" - ObjectFile=".\x86-temp-release\pythoncore/" - ProgramDataBaseFileName=".\x86-temp-release\pythoncore/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -52,13 +48,7 @@ ImportLibrary=".\./python25.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCMIDLTool"/> + Name="VCMIDLTool"/> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -249,30 +280,6 @@ - - - - - - - - - @@ -282,84 +289,12 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - Modified: python/branches/ssize_t/PCbuild/pythonw.vcproj ============================================================================== --- python/branches/ssize_t/PCbuild/pythonw.vcproj (original) +++ python/branches/ssize_t/PCbuild/pythonw.vcproj Sat Jan 7 04:49:32 2006 @@ -24,10 +24,6 @@ PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS" RuntimeLibrary="3" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-debug\pythonw/pythonw.pch" - AssemblerListingLocation=".\x86-temp-debug\pythonw/" - ObjectFile=".\x86-temp-debug\pythonw/" - ProgramDataBaseFileName=".\x86-temp-debug\pythonw/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -46,13 +42,7 @@ BaseAddress="0x1d000000" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCMIDLTool"/> + Name="VCMIDLTool"/> - - - - - - - - - - - - - - - - - - Modified: python/branches/ssize_t/PCbuild/readme.txt ============================================================================== --- python/branches/ssize_t/PCbuild/readme.txt (original) +++ python/branches/ssize_t/PCbuild/readme.txt Sat Jan 7 04:49:32 2006 @@ -124,21 +124,6 @@ nmake -f makefile.vc nmake -f makefile.vc install -zlib - Python wrapper for the zlib compression library. Get the source code - for version 1.2.3 from a convenient mirror at: - http://www.gzip.org/zlib/ - Unpack into dist\zlib-1.2.3. - A custom pre-link step in the zlib project settings should manage to - build zlib-1.2.3\zlib.lib by magic before zlib.pyd (or zlib_d.pyd) is - linked in PCbuild\. - However, the zlib project is not smart enough to remove anything under - zlib-1.2.3\ when you do a clean, so if you want to rebuild zlib.lib - you need to clean up zlib-1.2.3\ by hand. - When building zlib.pyd for Itanium, the pre-link build step won't work, - so you will need to invoke nmake manually, using an IA64 build - environment. - bz2 Python wrapper for the libbz2 compression library. Homepage http://sources.redhat.com/bzip2/ Modified: python/branches/ssize_t/PCbuild/select.vcproj ============================================================================== --- python/branches/ssize_t/PCbuild/select.vcproj (original) +++ python/branches/ssize_t/PCbuild/select.vcproj Sat Jan 7 04:49:32 2006 @@ -27,10 +27,6 @@ RuntimeLibrary="2" EnableFunctionLevelLinking="TRUE" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-release\select/select.pch" - AssemblerListingLocation=".\x86-temp-release\select/" - ObjectFile=".\x86-temp-release\select/" - ProgramDataBaseFileName=".\x86-temp-release\select/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -51,13 +47,7 @@ ImportLibrary=".\./select.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - Modified: python/branches/ssize_t/PCbuild/unicodedata.vcproj ============================================================================== --- python/branches/ssize_t/PCbuild/unicodedata.vcproj (original) +++ python/branches/ssize_t/PCbuild/unicodedata.vcproj Sat Jan 7 04:49:32 2006 @@ -28,10 +28,6 @@ RuntimeLibrary="2" EnableFunctionLevelLinking="TRUE" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-release\unicodedata/unicodedata.pch" - AssemblerListingLocation=".\x86-temp-release\unicodedata/" - ObjectFile=".\x86-temp-release\unicodedata/" - ProgramDataBaseFileName=".\x86-temp-release\unicodedata/" WarningLevel="3" SuppressStartupBanner="TRUE" CompileAs="0"/> @@ -47,13 +43,7 @@ ImportLibrary=".\./unicodedata.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - Modified: python/branches/ssize_t/PCbuild/w9xpopen.vcproj ============================================================================== --- python/branches/ssize_t/PCbuild/w9xpopen.vcproj (original) +++ python/branches/ssize_t/PCbuild/w9xpopen.vcproj Sat Jan 7 04:49:32 2006 @@ -25,10 +25,6 @@ BasicRuntimeChecks="3" RuntimeLibrary="3" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-debug\w9xpopen/w9xpopen.pch" - AssemblerListingLocation=".\x86-temp-debug\w9xpopen/" - ObjectFile=".\x86-temp-debug\w9xpopen/" - ProgramDataBaseFileName=".\x86-temp-debug\w9xpopen/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3"/> @@ -43,13 +39,7 @@ ProgramDatabaseFile=".\./w9xpopen_d.pdb" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - Modified: python/branches/ssize_t/PCbuild/winsound.vcproj ============================================================================== --- python/branches/ssize_t/PCbuild/winsound.vcproj (original) +++ python/branches/ssize_t/PCbuild/winsound.vcproj Sat Jan 7 04:49:32 2006 @@ -26,10 +26,6 @@ BasicRuntimeChecks="3" RuntimeLibrary="3" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-debug\winsound/winsound.pch" - AssemblerListingLocation=".\x86-temp-debug\winsound/" - ObjectFile=".\x86-temp-debug\winsound/" - ProgramDataBaseFileName=".\x86-temp-debug\winsound/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -48,13 +44,7 @@ ImportLibrary=".\./winsound_d.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> @@ -114,13 +98,7 @@ ImportLibrary=".\./winsound.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - Deleted: /python/branches/ssize_t/PCbuild/zlib.vcproj ============================================================================== --- /python/branches/ssize_t/PCbuild/zlib.vcproj Sat Jan 7 04:49:32 2006 +++ (empty file) @@ -1,245 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Modified: python/branches/ssize_t/Python/dynload_shlib.c ============================================================================== --- python/branches/ssize_t/Python/dynload_shlib.c (original) +++ python/branches/ssize_t/Python/dynload_shlib.c Sat Jan 7 04:49:32 2006 @@ -130,7 +130,7 @@ handle = dlopen(pathname, dlopenflags); if (handle == NULL) { - char *error = dlerror(); + const char *error = dlerror(); if (error == NULL) error = "unknown dlopen() error"; PyErr_SetString(PyExc_ImportError, error); Modified: python/branches/ssize_t/Python/sysmodule.c ============================================================================== --- python/branches/ssize_t/Python/sysmodule.c (original) +++ python/branches/ssize_t/Python/sysmodule.c Sat Jan 7 04:49:32 2006 @@ -457,16 +457,16 @@ tstate->interp->tscdump = 0; Py_INCREF(Py_None); return Py_None; - + } -PyDoc_STRVAR(settscdump_doc, +PyDoc_STRVAR(settscdump_doc, "settscdump(bool)\n\ \n\ If true, tell the Python interpreter to dump VM measurements to\n\ stderr. If false, turn off dump. The measurements are based on the\n\ processor's time-stamp counter." -); +); #endif /* TSC */ static PyObject * @@ -476,8 +476,8 @@ if (!PyArg_ParseTuple(args, "i:setrecursionlimit", &new_limit)) return NULL; if (new_limit <= 0) { - PyErr_SetString(PyExc_ValueError, - "recursion limit must be positive"); + PyErr_SetString(PyExc_ValueError, + "recursion limit must be positive"); return NULL; } Py_SetRecursionLimit(new_limit); @@ -713,7 +713,7 @@ static PyMethodDef sys_methods[] = { /* Might as well keep this in alphabetic order */ - {"callstats", (PyCFunction)PyEval_GetCallStats, METH_NOARGS, + {"callstats", (PyCFunction)PyEval_GetCallStats, METH_NOARGS, callstats_doc}, {"displayhook", sys_displayhook, METH_O, displayhook_doc}, {"exc_info", sys_exc_info, METH_NOARGS, exc_info_doc}, @@ -721,11 +721,11 @@ {"excepthook", sys_excepthook, METH_VARARGS, excepthook_doc}, {"exit", sys_exit, METH_VARARGS, exit_doc}, #ifdef Py_USING_UNICODE - {"getdefaultencoding", (PyCFunction)sys_getdefaultencoding, - METH_NOARGS, getdefaultencoding_doc}, + {"getdefaultencoding", (PyCFunction)sys_getdefaultencoding, + METH_NOARGS, getdefaultencoding_doc}, #endif #ifdef HAVE_DLOPEN - {"getdlopenflags", (PyCFunction)sys_getdlopenflags, METH_NOARGS, + {"getdlopenflags", (PyCFunction)sys_getdlopenflags, METH_NOARGS, getdlopenflags_doc}, #endif #ifdef COUNT_ALLOCS @@ -736,7 +736,7 @@ #endif #ifdef Py_USING_UNICODE {"getfilesystemencoding", (PyCFunction)sys_getfilesystemencoding, - METH_NOARGS, getfilesystemencoding_doc}, + METH_NOARGS, getfilesystemencoding_doc}, #endif #ifdef Py_TRACE_REFS {"getobjects", _Py_GetObjects, METH_VARARGS}, @@ -757,14 +757,14 @@ #endif #ifdef Py_USING_UNICODE {"setdefaultencoding", sys_setdefaultencoding, METH_VARARGS, - setdefaultencoding_doc}, + setdefaultencoding_doc}, #endif {"setcheckinterval", sys_setcheckinterval, METH_VARARGS, - setcheckinterval_doc}, + setcheckinterval_doc}, {"getcheckinterval", sys_getcheckinterval, METH_NOARGS, - getcheckinterval_doc}, + getcheckinterval_doc}, #ifdef HAVE_DLOPEN - {"setdlopenflags", sys_setdlopenflags, METH_VARARGS, + {"setdlopenflags", sys_setdlopenflags, METH_VARARGS, setdlopenflags_doc}, #endif {"setprofile", sys_setprofile, METH_O, setprofile_doc}, @@ -934,6 +934,88 @@ return fflush (stream) || prev_fail ? EOF : 0; } +/* Subversion branch and revision management */ +static const char _patchlevel_revision[] = PY_PATCHLEVEL_REVISION; +static const char headurl[] = "$HeadURL$"; +static int svn_initialized; +static char patchlevel_revision[50]; /* Just the number */ +static char branch[50]; +static char shortbranch[50]; +static const char *svn_revision; + +static void +svnversion_init(void) +{ + const char *python, *br_start, *br_end, *br_end2, *svnversion; + int len, istag; + + if (svn_initialized) + return; + + python = strstr(headurl, "/python/"); + if (!python) + Py_FatalError("subversion keywords missing"); + + br_start = python + 8; + br_end = strchr(br_start, '/'); + /* Works even for trunk, + as we are in trunk/Python/sysmodule.c */ + br_end2 = strchr(br_end+1, '/'); + + istag = strncmp(br_start, "tags", 4) == 0; + if (strncmp(br_start, "trunk", 5) == 0) { + strcpy(branch, "trunk"); + strcpy(shortbranch, "trunk"); + + } + else if (istag || strncmp(br_start, "branches", 8) == 0) { + len = br_end2 - br_start; + strncpy(branch, br_start, len); + branch[len] = '\0'; + + len = br_end2 - (br_end + 1); + strncpy(shortbranch, br_end + 1, len); + shortbranch[len] = '\0'; + } + else { + Py_FatalError("bad HeadURL"); + return; + } + + + svnversion = _Py_svnversion(); + if (strcmp(svnversion, "exported") != 0) + svn_revision = svnversion; + else if (istag) { + len = strlen(_patchlevel_revision); + strncpy(patchlevel_revision, _patchlevel_revision + 11, + len - 13); + patchlevel_revision[len - 13] = '\0'; + svn_revision = patchlevel_revision; + } + else + svn_revision = ""; + + svn_initialized = 1; +} + +/* Return svnversion output if available. + Else return Revision of patchlevel.h if on branch. + Else return empty string */ +const char* +Py_SubversionRevision() +{ + svnversion_init(); + return svn_revision; +} + +const char* +Py_SubversionShortBranch() +{ + svnversion_init(); + return shortbranch; +} + PyObject * _PySys_Init(void) { @@ -1003,8 +1085,9 @@ PyDict_SetItemString(sysdict, "hexversion", v = PyInt_FromLong(PY_VERSION_HEX)); Py_XDECREF(v); - PyDict_SetItemString(sysdict, "build_number", - v = PyString_FromString(Py_GetBuildNumber())); + svnversion_init(); + v = Py_BuildValue("(ssz)", "CPython", branch, svn_revision); + PyDict_SetItemString(sysdict, "subversion", v); Py_XDECREF(v); /* * These release level checks are mutually exclusive and cover @@ -1086,7 +1169,7 @@ if (warnoptions != NULL) { PyDict_SetItemString(sysdict, "warnoptions", warnoptions); } - + if (PyErr_Occurred()) return NULL; return m; @@ -1098,7 +1181,7 @@ int i, n; char *p; PyObject *v, *w; - + n = 1; p = path; while ((p = strchr(p, delim)) != NULL) { Modified: python/branches/ssize_t/Tools/msi/msi.py ============================================================================== --- python/branches/ssize_t/Tools/msi/msi.py (original) +++ python/branches/ssize_t/Tools/msi/msi.py Sat Jan 7 04:49:32 2006 @@ -103,7 +103,6 @@ 'select.pyd', 'unicodedata.pyd', 'winsound.pyd', - 'zlib.pyd', '_elementtree.pyd', '_bsddb.pyd', '_socket.pyd', @@ -112,15 +111,9 @@ '_tkinter.pyd', ] -if major+minor <= "23": +if major+minor <= "24": extensions.extend([ - '_csv.pyd', - '_sre.pyd', - '_symtable.pyd', - '_winreg.pyd', - 'datetime.pyd' - 'mmap.pyd', - 'parser.pyd', + 'zlib.pyd', ]) # Well-known component UUIDs @@ -907,7 +900,7 @@ pydirs = [(root,"Lib")] while pydirs: parent, dir = pydirs.pop() - if dir == "CVS" or dir.startswith("plat-"): + if dir == ".svn" or dir.startswith("plat-"): continue elif dir in ["lib-tk", "idlelib", "Icons"]: if not have_tcl: @@ -957,9 +950,9 @@ lib.add_file("wininst-6.exe") lib.add_file("wininst-7.1.exe") if dir=="data" and parent.physical=="test" and parent.basedir.physical=="email": - # This should contain all non-CVS files listed in CVS + # This should contain all non-.svn files listed in subversion for f in os.listdir(lib.absolute): - if f.endswith(".txt") or f=="CVS":continue + if f.endswith(".txt") or f==".svn":continue if f.endswith(".au") or f.endswith(".gif"): lib.add_file(f) else: Modified: python/branches/ssize_t/configure ============================================================================== --- python/branches/ssize_t/configure (original) +++ python/branches/ssize_t/configure Sat Jan 7 04:49:32 2006 @@ -1,5 +1,5 @@ #! /bin/sh -# From configure.in Revision: 41731 . +# From configure.in Revision: 41868 . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.59 for python 2.5. # @@ -312,7 +312,7 @@ # include #endif" -ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS VERSION SOVERSION CONFIG_ARGS PYTHONFRAMEWORK PYTHONFRAMEWORKDIR PYTHONFRAMEWORKPREFIX PYTHONFRAMEWORKINSTALLDIR MACHDEP SGI_ABI EXTRAPLATDIR EXTRAMACHDEPPATH CONFIGURE_MACOSX_DEPLOYMENT_TARGET CXX MAINOBJ EXEEXT CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC OBJEXT CPP EGREP BUILDEXEEXT LIBRARY LDLIBRARY DLLLIBRARY BLDLIBRARY LDLIBRARYDIR INSTSONAME RUNSHARED LINKCC RANLIB ac_ct_RANLIB AR INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN OPT BASECFLAGS OTHER_LIBTOOL_OPT LIBTOOL_CRUFT SO LDSHARED BLDSHARED CCSHARED LINKFORSHARED CFLAGSFORSHARED SHLIBS USE_SIGNAL_MODULE SIGNAL_OBJS USE_THREAD_MODULE LDLAST THREADOBJ DLINCLDIR DYNLOADFILE MACHDEP_OBJS TRUE LIBOBJS HAVE_GETHOSTBYNAME_R_6_ARG HAVE_GETHOSTBYNAME_R_5_ARG HAVE_GETHOSTBYNAME_R_3_ARG HAVE_GETHOSTBYNAME_R HAVE_GETHOSTBYNAME LIBM LIBC UNICODE_OBJS THREADHEADERS SRCDIRS LTLIBOBJS' +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS VERSION SOVERSION CONFIG_ARGS PYTHONFRAMEWORK PYTHONFRAMEWORKDIR PYTHONFRAMEWORKPREFIX PYTHONFRAMEWORKINSTALLDIR MACHDEP SGI_ABI EXTRAPLATDIR EXTRAMACHDEPPATH CONFIGURE_MACOSX_DEPLOYMENT_TARGET CXX MAINOBJ EXEEXT CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC OBJEXT CPP EGREP BUILDEXEEXT LIBRARY LDLIBRARY DLLLIBRARY BLDLIBRARY LDLIBRARYDIR INSTSONAME RUNSHARED LINKCC RANLIB ac_ct_RANLIB AR SVNVERSION INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN OPT BASECFLAGS OTHER_LIBTOOL_OPT LIBTOOL_CRUFT SO LDSHARED BLDSHARED CCSHARED LINKFORSHARED CFLAGSFORSHARED SHLIBS USE_SIGNAL_MODULE SIGNAL_OBJS USE_THREAD_MODULE LDLAST THREADOBJ DLINCLDIR DYNLOADFILE MACHDEP_OBJS TRUE LIBOBJS HAVE_GETHOSTBYNAME_R_6_ARG HAVE_GETHOSTBYNAME_R_5_ARG HAVE_GETHOSTBYNAME_R_3_ARG HAVE_GETHOSTBYNAME_R HAVE_GETHOSTBYNAME LIBM LIBC UNICODE_OBJS THREADHEADERS SRCDIRS LTLIBOBJS' ac_subst_files='' # Initialize some variables set by options. @@ -3575,6 +3575,49 @@ test -n "$AR" || AR="ar" + +# Extract the first word of "svnversion", so it can be a program name with args. +set dummy svnversion; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_SVNVERSION+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$SVNVERSION"; then + ac_cv_prog_SVNVERSION="$SVNVERSION" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_SVNVERSION="found" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +SVNVERSION=$ac_cv_prog_SVNVERSION +if test -n "$SVNVERSION"; then + echo "$as_me:$LINENO: result: $SVNVERSION" >&5 +echo "${ECHO_T}$SVNVERSION" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +if test $SVNVERSION = found +then + SVNVERSION="svnversion \$(srcdir)" +else + SVNVERSION="echo exported" +fi + case $MACHDEP in bsdos*|hp*|HP*) # install -d does not work on BSDI or HP-UX @@ -21955,6 +21998,7 @@ s, at RANLIB@,$RANLIB,;t t s, at ac_ct_RANLIB@,$ac_ct_RANLIB,;t t s, at AR@,$AR,;t t +s, at SVNVERSION@,$SVNVERSION,;t t s, at INSTALL_PROGRAM@,$INSTALL_PROGRAM,;t t s, at INSTALL_SCRIPT@,$INSTALL_SCRIPT,;t t s, at INSTALL_DATA@,$INSTALL_DATA,;t t Modified: python/branches/ssize_t/configure.in ============================================================================== --- python/branches/ssize_t/configure.in (original) +++ python/branches/ssize_t/configure.in Sat Jan 7 04:49:32 2006 @@ -618,6 +618,15 @@ AC_SUBST(AR) AC_CHECK_PROGS(AR, ar aal, ar) +AC_SUBST(SVNVERSION) +AC_CHECK_PROG(SVNVERSION, svnversion, found) +if test $SVNVERSION = found +then + SVNVERSION="svnversion \$(srcdir)" +else + SVNVERSION="echo exported" +fi + case $MACHDEP in bsdos*|hp*|HP*) # install -d does not work on BSDI or HP-UX From nnorwitz at gmail.com Sat Jan 7 06:19:46 2006 From: nnorwitz at gmail.com (Neal Norwitz) Date: Fri, 6 Jan 2006 21:19:46 -0800 Subject: [Python-checkins] r41940 - python/trunk/Lib/test/test_compiler.py In-Reply-To: <20060106192820.2EF561E400A@bag.python.org> References: <20060106192820.2EF561E400A@bag.python.org> Message-ID: This change broke the build. See: http://www.python.org/dev/buildbot/ On 1/6/06, reinhold.birkenfeld wrote: > Author: reinhold.birkenfeld > Date: Fri Jan 6 20:28:15 2006 > New Revision: 41940 > > Modified: > python/trunk/Lib/test/test_compiler.py > Log: > Add compiler test regarding optional arguments. > > > > Modified: python/trunk/Lib/test/test_compiler.py > ============================================================================== > --- python/trunk/Lib/test/test_compiler.py (original) > +++ python/trunk/Lib/test/test_compiler.py Fri Jan 6 20:28:15 2006 > @@ -12,7 +12,6 @@ > # standard library and its test suite. This doesn't verify > # that any of the code is correct, merely the compiler is able > # to generate some kind of code for it. > - > libdir = os.path.dirname(unittest.__file__) > testdir = os.path.dirname(test.test_support.__file__) > > @@ -36,6 +35,10 @@ > > def testNewClassSyntax(self): > compiler.compile("class foo():pass\n\n","","exec") > + > + def testSyntaxErrors(self): > + self.assertRaises(SyntaxError, compiler.compile, > + "def foo(a=1,b):pass\n\n", "", "exec") > > def testLineNo(self): > # Test that all nodes except Module have a correct lineno attribute. > _______________________________________________ > Python-checkins mailing list > Python-checkins at python.org > http://mail.python.org/mailman/listinfo/python-checkins > From reinhold-birkenfeld-nospam at wolke7.net Sat Jan 7 09:19:02 2006 From: reinhold-birkenfeld-nospam at wolke7.net (Reinhold Birkenfeld) Date: Sat, 07 Jan 2006 09:19:02 +0100 Subject: [Python-checkins] r41940 - python/trunk/Lib/test/test_compiler.py In-Reply-To: References: <20060106192820.2EF561E400A@bag.python.org> Message-ID: Well, it is not the test that's broken... it's compiler. Reinhold Neal Norwitz wrote: > This change broke the build. See: > > http://www.python.org/dev/buildbot/ > > On 1/6/06, reinhold.birkenfeld wrote: >> Author: reinhold.birkenfeld >> Date: Fri Jan 6 20:28:15 2006 >> New Revision: 41940 >> >> Modified: >> python/trunk/Lib/test/test_compiler.py >> Log: >> Add compiler test regarding optional arguments. >> >> >> >> Modified: python/trunk/Lib/test/test_compiler.py >> ============================================================================== >> --- python/trunk/Lib/test/test_compiler.py (original) >> +++ python/trunk/Lib/test/test_compiler.py Fri Jan 6 20:28:15 2006 >> @@ -12,7 +12,6 @@ >> # standard library and its test suite. This doesn't verify >> # that any of the code is correct, merely the compiler is able >> # to generate some kind of code for it. >> - >> libdir = os.path.dirname(unittest.__file__) >> testdir = os.path.dirname(test.test_support.__file__) >> >> @@ -36,6 +35,10 @@ >> >> def testNewClassSyntax(self): >> compiler.compile("class foo():pass\n\n","","exec") >> + >> + def testSyntaxErrors(self): >> + self.assertRaises(SyntaxError, compiler.compile, >> + "def foo(a=1,b):pass\n\n", "", "exec") >> >> def testLineNo(self): >> # Test that all nodes except Module have a correct lineno attribute. >> _______________________________________________ >> Python-checkins mailing list >> Python-checkins at python.org >> http://mail.python.org/mailman/listinfo/python-checkins >> -- Mail address is perfectly valid! From python-checkins at python.org Sat Jan 7 20:08:56 2006 From: python-checkins at python.org (skip.montanaro) Date: Sat, 7 Jan 2006 20:08:56 +0100 (CET) Subject: [Python-checkins] r41944 - python/branches/release24-maint/Lib/test/test__locale.py Message-ID: <20060107190856.40A411E4002@bag.python.org> Author: skip.montanaro Date: Sat Jan 7 20:08:55 2006 New Revision: 41944 Modified: python/branches/release24-maint/Lib/test/test__locale.py Log: Skip this test on Darwin. It's skipped on the trunk and is listed as an expected skip in regrtest.py. Make it so. Modified: python/branches/release24-maint/Lib/test/test__locale.py ============================================================================== --- python/branches/release24-maint/Lib/test/test__locale.py (original) +++ python/branches/release24-maint/Lib/test/test__locale.py Sat Jan 7 20:08:55 2006 @@ -2,6 +2,12 @@ from _locale import (setlocale, LC_NUMERIC, RADIXCHAR, THOUSEP, nl_langinfo, localeconv, Error) import unittest +from platform import uname + +if uname()[0] == "Darwin": + maj, min, mic = [int(part) for part in uname()[2].split(".")] + if (maj, min, mic) < (8, 0, 0): + raise TestSkipped("locale support broken for OS X < 10.4") candidate_locales = ['es_UY', 'fr_FR', 'fi_FI', 'es_CO', 'pt_PT', 'it_IT', 'et_EE', 'es_PY', 'no_NO', 'nl_NL', 'lv_LV', 'el_GR', 'be_BY', 'fr_BE', From jimjjewett at gmail.com Sat Jan 7 20:15:26 2006 From: jimjjewett at gmail.com (Jim Jewett) Date: Sat, 7 Jan 2006 14:15:26 -0500 Subject: [Python-checkins] r41940 - python/trunk/Lib/test/test_compiler.py In-Reply-To: References: <20060106192820.2EF561E400A@bag.python.org> Message-ID: On 1/7/06, Reinhold Birkenfeld wrote: > Well, it is not the test that's broken... it's compiler. Maybe. Guido's statement (maybe short of a pronouncement) was that keyword-only arguments were OK in principle, and that *args could follow keywords. It wasn't true yet because no one had put in the work, but it would be an acceptable change. I interpret this to mean that def f(a=1, b): pass should not necessarily raise an error, but I would like to see what it does to def f(a=1, b): print a,b f(b=7) before saying that it is OK. From nnorwitz at gmail.com Sat Jan 7 21:45:27 2006 From: nnorwitz at gmail.com (Neal Norwitz) Date: Sat, 7 Jan 2006 12:45:27 -0800 Subject: [Python-checkins] Checking in a broken test was: Re: r41940 - python/trunk/Lib/test/test_compiler.py Message-ID: [moving to python-dev] > On 1/7/06, Reinhold Birkenfeld wrote: > > Well, it is not the test that's broken... it's compiler. [In reference to: http://mail.python.org/pipermail/python-checkins/2006-January/048715.html] In the past, we haven't checked in tests which are known to be broken. There are several good reasons for this. I would prefer you, 1) also fix the code so the test doesn't fail, 2) revert the change (there's still a bug report open, right?), or 3) generalize tests for known bugs. I strongly prefer #1, but have been thinking about adding #3. There are many open bug reports that fall into two broad categories: incorrect behaviour and crashers. I've been thinking about adding two tests which incorporate these bugs as a way of consolidating where the known problems are. Also, it's great when we have test cases that can be moved to the proper place once the fix has been checked in. I'm proposing something like add two files to Lib/test: outstanding_bugs.py and outstanding_crashes.py. Both would be normal test files with info about the bug report and the code that causes problems. This test in test_compiler should be moved to outstanding_bugs.py. And for a different discussion: On 1/7/06, Jim Jewett wrote: > Maybe. Guido's statement (maybe short of a pronouncement) > was that keyword-only arguments were OK in principle, and > that *args could follow keywords. It wasn't true yet because > no one had put in the work, but it would be an acceptable > change. > > I interpret this to mean that > > def f(a=1, b): pass > > should not necessarily raise an error, but I would like to see what > it does to > > def f(a=1, b): > print a,b > f(b=7) > > before saying that it is OK. From python-checkins at python.org Sat Jan 7 22:07:22 2006 From: python-checkins at python.org (neal.norwitz) Date: Sat, 7 Jan 2006 22:07:22 +0100 (CET) Subject: [Python-checkins] r41945 - python/trunk/configure python/trunk/configure.in python/trunk/pyconfig.h.in Message-ID: <20060107210722.339181E4002@bag.python.org> Author: neal.norwitz Date: Sat Jan 7 22:07:20 2006 New Revision: 41945 Modified: python/trunk/configure python/trunk/configure.in python/trunk/pyconfig.h.in Log: If there is no libtermcap all the readline tests failed. This inappropriately disabled some capabilities which exist. Now test for readline in -lreadline, if not found there, try it in -ltermcap only if not in -lreadline. If both libraries are required, there will need to be more work, but I'm not sure under what conditions both libraries would be required. Modified: python/trunk/configure ============================================================================== --- python/trunk/configure (original) +++ python/trunk/configure Sat Jan 7 22:07:20 2006 @@ -1,5 +1,5 @@ #! /bin/sh -# From configure.in Revision: 41923 . +# From configure.in Revision: 41924 . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.59 for python 2.5. # @@ -19694,6 +19694,159 @@ fi +# check where readline lives + +echo "$as_me:$LINENO: checking for readline in -lreadline" >&5 +echo $ECHO_N "checking for readline in -lreadline... $ECHO_C" >&6 +if test "${ac_cv_lib_readline_readline+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char readline (); +int +main () +{ +readline (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_readline_readline=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_lib_readline_readline=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +echo "$as_me:$LINENO: result: $ac_cv_lib_readline_readline" >&5 +echo "${ECHO_T}$ac_cv_lib_readline_readline" >&6 +if test $ac_cv_lib_readline_readline = yes; then + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBREADLINE 1 +_ACEOF + + LIBS="-lreadline $LIBS" + +fi + +if test "$ac_cv_have_readline_readline" = no +then + +echo "$as_me:$LINENO: checking for readline in -ltermcap" >&5 +echo $ECHO_N "checking for readline in -ltermcap... $ECHO_C" >&6 +if test "${ac_cv_lib_termcap_readline+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ltermcap $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char readline (); +int +main () +{ +readline (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_termcap_readline=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_lib_termcap_readline=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +echo "$as_me:$LINENO: result: $ac_cv_lib_termcap_readline" >&5 +echo "${ECHO_T}$ac_cv_lib_termcap_readline" >&6 +if test $ac_cv_lib_termcap_readline = yes; then + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBTERMCAP 1 +_ACEOF + + LIBS="-ltermcap $LIBS" + +fi + +fi + # check for readline 2.1 echo "$as_me:$LINENO: checking for rl_callback_handler_install in -lreadline" >&5 echo $ECHO_N "checking for rl_callback_handler_install in -lreadline... $ECHO_C" >&6 @@ -19701,7 +19854,7 @@ echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_check_lib_save_LIBS=$LIBS -LIBS="-lreadline -ltermcap $LIBS" +LIBS="-lreadline $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -19833,7 +19986,7 @@ echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_check_lib_save_LIBS=$LIBS -LIBS="-lreadline -ltermcap $LIBS" +LIBS="-lreadline $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -19907,7 +20060,7 @@ echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_check_lib_save_LIBS=$LIBS -LIBS="-lreadline -ltermcap $LIBS" +LIBS="-lreadline $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF Modified: python/trunk/configure.in ============================================================================== --- python/trunk/configure.in (original) +++ python/trunk/configure.in Sat Jan 7 22:07:20 2006 @@ -2871,10 +2871,17 @@ [Define this if you have flockfile(), getc_unlocked(), and funlockfile()]) fi +# check where readline lives +AC_CHECK_LIB(readline, readline) +if test "$ac_cv_have_readline_readline" = no +then + AC_CHECK_LIB(termcap, readline) +fi + # check for readline 2.1 AC_CHECK_LIB(readline, rl_callback_handler_install, AC_DEFINE(HAVE_RL_CALLBACK, 1, - [Define if you have readline 2.1]), , -ltermcap) + [Define if you have readline 2.1]), , ) # check for readline 2.2 AC_TRY_CPP([#include ], @@ -2890,12 +2897,12 @@ # check for readline 4.0 AC_CHECK_LIB(readline, rl_pre_input_hook, AC_DEFINE(HAVE_RL_PRE_INPUT_HOOK, 1, - [Define if you have readline 4.0]), , -ltermcap) + [Define if you have readline 4.0]), , ) # check for readline 4.2 AC_CHECK_LIB(readline, rl_completion_matches, AC_DEFINE(HAVE_RL_COMPLETION_MATCHES, 1, - [Define if you have readline 4.2]), , -ltermcap) + [Define if you have readline 4.2]), , ) # also in readline 4.2 AC_TRY_CPP([#include ], Modified: python/trunk/pyconfig.h.in ============================================================================== --- python/trunk/pyconfig.h.in (original) +++ python/trunk/pyconfig.h.in Sat Jan 7 22:07:20 2006 @@ -275,9 +275,15 @@ /* Define to 1 if you have the header file. */ #undef HAVE_LIBINTL_H +/* Define to 1 if you have the `readline' library (-lreadline). */ +#undef HAVE_LIBREADLINE + /* Define to 1 if you have the `resolv' library (-lresolv). */ #undef HAVE_LIBRESOLV +/* Define to 1 if you have the `termcap' library (-ltermcap). */ +#undef HAVE_LIBTERMCAP + /* Define to 1 if you have the header file. */ #undef HAVE_LIBUTIL_H From python-checkins at python.org Sat Jan 7 22:19:54 2006 From: python-checkins at python.org (neal.norwitz) Date: Sat, 7 Jan 2006 22:19:54 +0100 (CET) Subject: [Python-checkins] r41946 - python/trunk/Modules/linuxaudiodev.c python/trunk/Modules/ossaudiodev.c Message-ID: <20060107211954.318A61E4002@bag.python.org> Author: neal.norwitz Date: Sat Jan 7 22:19:49 2006 New Revision: 41946 Modified: python/trunk/Modules/linuxaudiodev.c python/trunk/Modules/ossaudiodev.c Log: Fix icc warnings: extra semi-colon Modified: python/trunk/Modules/linuxaudiodev.c ============================================================================== --- python/trunk/Modules/linuxaudiodev.c (original) +++ python/trunk/Modules/linuxaudiodev.c Sat Jan 7 22:19:49 2006 @@ -40,7 +40,7 @@ #endif typedef struct { - PyObject_HEAD; + PyObject_HEAD int x_fd; /* The open file */ int x_mode; /* file mode */ int x_icount; /* Input count */ Modified: python/trunk/Modules/ossaudiodev.c ============================================================================== --- python/trunk/Modules/ossaudiodev.c (original) +++ python/trunk/Modules/ossaudiodev.c Sat Jan 7 22:19:49 2006 @@ -45,7 +45,7 @@ #endif typedef struct { - PyObject_HEAD; + PyObject_HEAD char *devicename; /* name of the device file */ int fd; /* file descriptor */ int mode; /* file mode (O_RDONLY, etc.) */ @@ -55,7 +55,7 @@ } oss_audio_t; typedef struct { - PyObject_HEAD; + PyObject_HEAD int fd; /* The open mixer device */ } oss_mixer_t; From python-checkins at python.org Sat Jan 7 22:20:29 2006 From: python-checkins at python.org (neal.norwitz) Date: Sat, 7 Jan 2006 22:20:29 +0100 (CET) Subject: [Python-checkins] r41947 - python/trunk/Modules/_hashopenssl.c Message-ID: <20060107212029.9E8831E4032@bag.python.org> Author: neal.norwitz Date: Sat Jan 7 22:20:24 2006 New Revision: 41947 Modified: python/trunk/Modules/_hashopenssl.c Log: Fix icc warnings: extra semi-colon and signed vs unsigned Modified: python/trunk/Modules/_hashopenssl.c ============================================================================== --- python/trunk/Modules/_hashopenssl.c (original) +++ python/trunk/Modules/_hashopenssl.c Sat Jan 7 22:20:24 2006 @@ -33,12 +33,12 @@ static EVP_MD_CTX CONST_new_ ## Name ## _ctx; \ static EVP_MD_CTX *CONST_new_ ## Name ## _ctx_p = NULL; -DEFINE_CONSTS_FOR_NEW(md5); -DEFINE_CONSTS_FOR_NEW(sha1); -DEFINE_CONSTS_FOR_NEW(sha224); -DEFINE_CONSTS_FOR_NEW(sha256); -DEFINE_CONSTS_FOR_NEW(sha384); -DEFINE_CONSTS_FOR_NEW(sha512); +DEFINE_CONSTS_FOR_NEW(md5) +DEFINE_CONSTS_FOR_NEW(sha1) +DEFINE_CONSTS_FOR_NEW(sha224) +DEFINE_CONSTS_FOR_NEW(sha256) +DEFINE_CONSTS_FOR_NEW(sha384) +DEFINE_CONSTS_FOR_NEW(sha512) static EVPobject * @@ -101,7 +101,7 @@ EVP_MD_CTX_copy(&temp_ctx, &self->ctx); digest_size = EVP_MD_CTX_size(&temp_ctx); - EVP_DigestFinal(&temp_ctx, (char *)digest, NULL); + EVP_DigestFinal(&temp_ctx, digest, NULL); retval = PyString_FromStringAndSize((const char *)digest, digest_size); EVP_MD_CTX_cleanup(&temp_ctx); @@ -329,7 +329,7 @@ static PyObject * EVPnew(PyObject *name_obj, const EVP_MD *digest, const EVP_MD_CTX *initial_ctx, - const char *cp, unsigned int len) + const unsigned char *cp, unsigned int len) { EVPobject *self; From python-checkins at python.org Sat Jan 7 22:21:20 2006 From: python-checkins at python.org (neal.norwitz) Date: Sat, 7 Jan 2006 22:21:20 +0100 (CET) Subject: [Python-checkins] r41948 - python/trunk/Modules/expat/xmlparse.c Message-ID: <20060107212120.63ACD1E4002@bag.python.org> Author: neal.norwitz Date: Sat Jan 7 22:21:16 2006 New Revision: 41948 Modified: python/trunk/Modules/expat/xmlparse.c Log: Fix icc warnings: using wrong enum type Modified: python/trunk/Modules/expat/xmlparse.c ============================================================================== --- python/trunk/Modules/expat/xmlparse.c (original) +++ python/trunk/Modules/expat/xmlparse.c Sat Jan 7 22:21:16 2006 @@ -1539,7 +1539,7 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { const char *start; - enum XML_Error result = XML_STATUS_OK; + enum XML_Status result = XML_STATUS_OK; switch (parsing) { case XML_SUSPENDED: @@ -1698,7 +1698,7 @@ enum XML_Status XMLCALL XML_ResumeParser(XML_Parser parser) { - enum XML_Error result = XML_STATUS_OK; + enum XML_Status result = XML_STATUS_OK; if (parsing != XML_SUSPENDED) { errorCode = XML_ERROR_NOT_SUSPENDED; From python-checkins at python.org Sat Jan 7 22:23:30 2006 From: python-checkins at python.org (neal.norwitz) Date: Sat, 7 Jan 2006 22:23:30 +0100 (CET) Subject: [Python-checkins] r41949 - python/trunk/Python/ast.c Message-ID: <20060107212330.EB9441E4002@bag.python.org> Author: neal.norwitz Date: Sat Jan 7 22:23:26 2006 New Revision: 41949 Modified: python/trunk/Python/ast.c Log: Fix icc warnings: shadowing local variables Modified: python/trunk/Python/ast.c ============================================================================== --- python/trunk/Python/ast.c (original) +++ python/trunk/Python/ast.c Sat Jan 7 22:23:26 2006 @@ -252,7 +252,7 @@ if (!stmts) goto error; if (num == 1) { - stmt_ty s = ast_for_stmt(&c, n); + s = ast_for_stmt(&c, n); if (!s) goto error; asdl_seq_SET(stmts, 0, s); @@ -1092,7 +1092,6 @@ return NULL; for (j = 0; j < n_ifs; j++) { - expr_ty expression; REQ(ch, gen_iter); ch = CHILD(ch, 0); REQ(ch, gen_if); From python-checkins at python.org Sat Jan 7 22:24:11 2006 From: python-checkins at python.org (neal.norwitz) Date: Sat, 7 Jan 2006 22:24:11 +0100 (CET) Subject: [Python-checkins] r41950 - python/trunk/Python/compile.c Message-ID: <20060107212411.5BB821E4002@bag.python.org> Author: neal.norwitz Date: Sat Jan 7 22:24:09 2006 New Revision: 41950 Modified: python/trunk/Python/compile.c Log: Fix icc warnings: single bit fields should be unsigned, shadowing local variables Modified: python/trunk/Python/compile.c ============================================================================== --- python/trunk/Python/compile.c (original) +++ python/trunk/Python/compile.c Sat Jan 7 22:24:09 2006 @@ -51,9 +51,9 @@ #define DEFAULT_LNOTAB_SIZE 16 struct instr { - int i_jabs : 1; - int i_jrel : 1; - int i_hasarg : 1; + unsigned i_jabs : 1; + unsigned i_jrel : 1; + unsigned i_hasarg : 1; unsigned char i_opcode; int i_oparg; struct basicblock_ *i_target; /* target block (if jump instruction) */ @@ -74,9 +74,9 @@ block reached by normal control flow. */ struct basicblock_ *b_next; /* b_seen is used to perform a DFS of basicblocks. */ - int b_seen : 1; + unsigned b_seen : 1; /* b_return is true if a RETURN_VALUE opcode is inserted. */ - int b_return : 1; + unsigned b_return : 1; /* depth of stack upon entry of block, computed by stackdepth() */ int b_startdepth; /* instruction offset for block, computed by assemble_jump_offsets() */ @@ -1673,20 +1673,20 @@ } #define VISIT_SEQ(C, TYPE, SEQ) { \ - int i; \ + int _i; \ asdl_seq *seq = (SEQ); /* avoid variable capture */ \ - for (i = 0; i < asdl_seq_LEN(seq); i++) { \ - TYPE ## _ty elt = asdl_seq_GET(seq, i); \ + for (_i = 0; _i < asdl_seq_LEN(seq); _i++) { \ + TYPE ## _ty elt = asdl_seq_GET(seq, _i); \ if (!compiler_visit_ ## TYPE((C), elt)) \ return 0; \ } \ } #define VISIT_SEQ_IN_SCOPE(C, TYPE, SEQ) { \ - int i; \ + int _i; \ asdl_seq *seq = (SEQ); /* avoid variable capture */ \ - for (i = 0; i < asdl_seq_LEN(seq); i++) { \ - TYPE ## _ty elt = asdl_seq_GET(seq, i); \ + for (_i = 0; _i < asdl_seq_LEN(seq); _i++) { \ + TYPE ## _ty elt = asdl_seq_GET(seq, _i); \ if (!compiler_visit_ ## TYPE((C), elt)) { \ compiler_exit_scope(c); \ return 0; \ @@ -3859,7 +3859,7 @@ return 1; if (d_bytecode > 255) { - int i, nbytes, ncodes = d_bytecode / 255; + int j, nbytes, ncodes = d_bytecode / 255; nbytes = a->a_lnotab_off + 2 * ncodes; len = PyString_GET_SIZE(a->a_lnotab); if (nbytes >= len) { @@ -3871,7 +3871,7 @@ return 0; } lnotab = PyString_AS_STRING(a->a_lnotab) + a->a_lnotab_off; - for (i = 0; i < ncodes; i++) { + for (j = 0; j < ncodes; j++) { *lnotab++ = 255; *lnotab++ = 0; } @@ -3880,7 +3880,7 @@ } assert(d_bytecode <= 255); if (d_lineno > 255) { - int i, nbytes, ncodes = d_lineno / 255; + int j, nbytes, ncodes = d_lineno / 255; nbytes = a->a_lnotab_off + 2 * ncodes; len = PyString_GET_SIZE(a->a_lnotab); if (nbytes >= len) { @@ -3895,7 +3895,7 @@ *lnotab++ = 255; *lnotab++ = d_bytecode; d_bytecode = 0; - for (i = 1; i < ncodes; i++) { + for (j = 1; j < ncodes; j++) { *lnotab++ = 255; *lnotab++ = 0; } @@ -4190,7 +4190,7 @@ /* Emit code in reverse postorder from dfs. */ for (i = a.a_nblocks - 1; i >= 0; i--) { - basicblock *b = a.a_postorder[i]; + b = a.a_postorder[i]; for (j = 0; j < b->b_iused; j++) if (!assemble_emit(&a, &b->b_instr[j])) goto error; From python-checkins at python.org Sat Jan 7 22:24:57 2006 From: python-checkins at python.org (neal.norwitz) Date: Sat, 7 Jan 2006 22:24:57 +0100 (CET) Subject: [Python-checkins] r41951 - python/trunk/Python/symtable.c Message-ID: <20060107212457.2E05A1E4002@bag.python.org> Author: neal.norwitz Date: Sat Jan 7 22:24:54 2006 New Revision: 41951 Modified: python/trunk/Python/symtable.c Log: Fix icc warnings: shadowing local variable (i) and complex is set but not used, so remove Modified: python/trunk/Python/symtable.c ============================================================================== --- python/trunk/Python/symtable.c (original) +++ python/trunk/Python/symtable.c Sat Jan 7 22:24:54 2006 @@ -539,7 +539,7 @@ */ if (class && PyInt_AS_LONG(o) & (DEF_BOUND | DEF_GLOBAL)) { - int i = PyInt_AS_LONG(o) | DEF_FREE_CLASS; + i = PyInt_AS_LONG(o) | DEF_FREE_CLASS; o = PyInt_FromLong(i); if (!o) { Py_DECREF(free_value); @@ -1170,7 +1170,7 @@ static int symtable_visit_params(struct symtable *st, asdl_seq *args, int toplevel) { - int i, complex = 0; + int i; /* go through all the toplevel arguments first */ for (i = 0; i < asdl_seq_LEN(args); i++) { @@ -1183,7 +1183,6 @@ } else if (arg->kind == Tuple_kind) { assert(arg->v.Tuple.ctx == Store); - complex = 1; if (toplevel) { if (!symtable_implicit_arg(st, i)) return 0; From python-checkins at python.org Sat Jan 7 22:25:24 2006 From: python-checkins at python.org (neal.norwitz) Date: Sat, 7 Jan 2006 22:25:24 +0100 (CET) Subject: [Python-checkins] r41952 - python/trunk/Include/symtable.h Message-ID: <20060107212524.937501E4002@bag.python.org> Author: neal.norwitz Date: Sat Jan 7 22:25:23 2006 New Revision: 41952 Modified: python/trunk/Include/symtable.h Log: Fix icc warnings: single bit fields should be unsigned Modified: python/trunk/Include/symtable.h ============================================================================== --- python/trunk/Include/symtable.h (original) +++ python/trunk/Include/symtable.h Sat Jan 7 22:25:23 2006 @@ -31,13 +31,13 @@ PyObject *ste_children; /* list of child ids */ _Py_block_ty ste_type; /* module, class, or function */ int ste_unoptimized; /* false if namespace is optimized */ - int ste_nested : 1; /* true if block is nested */ - int ste_free : 1; /* true if block has free variables */ - int ste_child_free : 1; /* true if a child block has free variables, - including free refs to globals */ - int ste_generator : 1; /* true if namespace is a generator */ - int ste_varargs : 1; /* true if block has varargs */ - int ste_varkeywords : 1; /* true if block has varkeywords */ + unsigned ste_nested : 1; /* true if block is nested */ + unsigned ste_free : 1; /* true if block has free variables */ + unsigned ste_child_free : 1; /* true if a child block has free vars, + including free refs to globals */ + unsigned ste_generator : 1; /* true if namespace is a generator */ + unsigned ste_varargs : 1; /* true if block has varargs */ + unsigned ste_varkeywords : 1; /* true if block has varkeywords */ int ste_lineno; /* first line of block */ int ste_opt_lineno; /* lineno of last exec or import * */ int ste_tmpname; /* counter for listcomp temp vars */ From python-checkins at python.org Sat Jan 7 22:39:29 2006 From: python-checkins at python.org (neal.norwitz) Date: Sat, 7 Jan 2006 22:39:29 +0100 (CET) Subject: [Python-checkins] r41953 - python/trunk/configure python/trunk/configure.in Message-ID: <20060107213929.96F061E4007@bag.python.org> Author: neal.norwitz Date: Sat Jan 7 22:39:28 2006 New Revision: 41953 Modified: python/trunk/configure python/trunk/configure.in Log: icc accepts -OPT... but generates a warning, so just disable the check Modified: python/trunk/configure ============================================================================== --- python/trunk/configure (original) +++ python/trunk/configure Sat Jan 7 22:39:28 2006 @@ -1,5 +1,5 @@ #! /bin/sh -# From configure.in Revision: 41924 . +# From configure.in Revision: 41925 . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.59 for python 2.5. # @@ -3911,6 +3911,12 @@ BASECFLAGS="$BASECFLAGS $ac_arch_flags" fi +# disable check for icc since it seems to pass, but generates a warning +if test "$CC" = icc +then + ac_cv_opt_olimit_ok=no +fi + echo "$as_me:$LINENO: checking whether $CC accepts -OPT:Olimit=0" >&5 echo $ECHO_N "checking whether $CC accepts -OPT:Olimit=0... $ECHO_C" >&6 if test "${ac_cv_opt_olimit_ok+set}" = set; then Modified: python/trunk/configure.in ============================================================================== --- python/trunk/configure.in (original) +++ python/trunk/configure.in Sat Jan 7 22:39:28 2006 @@ -775,6 +775,12 @@ BASECFLAGS="$BASECFLAGS $ac_arch_flags" fi +# disable check for icc since it seems to pass, but generates a warning +if test "$CC" = icc +then + ac_cv_opt_olimit_ok=no +fi + AC_MSG_CHECKING(whether $CC accepts -OPT:Olimit=0) AC_CACHE_VAL(ac_cv_opt_olimit_ok, [ac_save_cc="$CC" From python-checkins at python.org Sun Jan 8 00:20:47 2006 From: python-checkins at python.org (tim.peters) Date: Sun, 8 Jan 2006 00:20:47 +0100 (CET) Subject: [Python-checkins] r41954 - python/trunk/Lib/test/test_compiler.py Message-ID: <20060107232047.3EC4D1E4002@bag.python.org> Author: tim.peters Date: Sun Jan 8 00:20:46 2006 New Revision: 41954 Modified: python/trunk/Lib/test/test_compiler.py Log: Revert revision 41940: the test causes -uall to fail everywhere. Modified: python/trunk/Lib/test/test_compiler.py ============================================================================== --- python/trunk/Lib/test/test_compiler.py (original) +++ python/trunk/Lib/test/test_compiler.py Sun Jan 8 00:20:46 2006 @@ -12,6 +12,7 @@ # standard library and its test suite. This doesn't verify # that any of the code is correct, merely the compiler is able # to generate some kind of code for it. + libdir = os.path.dirname(unittest.__file__) testdir = os.path.dirname(test.test_support.__file__) @@ -35,10 +36,6 @@ def testNewClassSyntax(self): compiler.compile("class foo():pass\n\n","","exec") - - def testSyntaxErrors(self): - self.assertRaises(SyntaxError, compiler.compile, - "def foo(a=1,b):pass\n\n", "", "exec") def testLineNo(self): # Test that all nodes except Module have a correct lineno attribute. From python-checkins at python.org Sun Jan 8 02:03:37 2006 From: python-checkins at python.org (neal.norwitz) Date: Sun, 8 Jan 2006 02:03:37 +0100 (CET) Subject: [Python-checkins] r41955 - in python/trunk: Modules/zipimport.c Python/pystrtod.c Message-ID: <20060108010337.5B3A21E4007@bag.python.org> Author: neal.norwitz Date: Sun Jan 8 02:03:36 2006 New Revision: 41955 Modified: python/trunk/Modules/zipimport.c python/trunk/Python/pystrtod.c Log: Fix icc warnings: strlen() returns size_t Modified: python/trunk/Modules/zipimport.c ============================================================================== --- python/trunk/Modules/zipimport.c (original) +++ python/trunk/Modules/zipimport.c Sun Jan 8 02:03:36 2006 @@ -62,7 +62,7 @@ zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds) { char *path, *p, *prefix, buf[MAXPATHLEN+2]; - int len; + size_t len; if (!_PyArg_NoKeywords("zipimporter()", kwds)) return -1; @@ -231,7 +231,7 @@ static int make_filename(char *prefix, char *name, char *path) { - int len; + size_t len; char *p; len = strlen(prefix); @@ -249,7 +249,7 @@ *p = SEP; } len += strlen(name); - return len; + return (int)len; } enum zi_module_info { @@ -659,7 +659,8 @@ FILE *fp; long compress, crc, data_size, file_size, file_offset, date, time; long header_offset, name_size, header_size, header_position; - long i, l, length, count; + long i, l, count; + size_t length; char path[MAXPATHLEN + 5]; char name[MAXPATHLEN + 5]; char *p, endof_central_dir[22]; Modified: python/trunk/Python/pystrtod.c ============================================================================== --- python/trunk/Python/pystrtod.c (original) +++ python/trunk/Python/pystrtod.c Sun Jan 8 02:03:36 2006 @@ -44,7 +44,7 @@ double val = -1.0; struct lconv *locale_data; const char *decimal_point; - int decimal_point_len; + size_t decimal_point_len; const char *p, *decimal_point_pos; const char *end = NULL; /* Silence gcc */ @@ -165,9 +165,8 @@ { struct lconv *locale_data; const char *decimal_point; - int decimal_point_len; + size_t decimal_point_len, rest_len; char *p; - int rest_len; char format_char; /* g_return_val_if_fail (buffer != NULL, NULL); */ From python-checkins at python.org Sun Jan 8 02:06:06 2006 From: python-checkins at python.org (neal.norwitz) Date: Sun, 8 Jan 2006 02:06:06 +0100 (CET) Subject: [Python-checkins] r41956 - python/trunk/Python/ast.c python/trunk/Python/symtable.c Message-ID: <20060108010606.C5EFB1E4007@bag.python.org> Author: neal.norwitz Date: Sun Jan 8 02:06:06 2006 New Revision: 41956 Modified: python/trunk/Python/ast.c python/trunk/Python/symtable.c Log: Fix icc warnings: conversion from "long" to "int" may lose significant bits Modified: python/trunk/Python/ast.c ============================================================================== --- python/trunk/Python/ast.c (original) +++ python/trunk/Python/ast.c Sun Jan 8 02:06:06 2006 @@ -78,7 +78,7 @@ ast_error_finish(const char *filename) { PyObject *type, *value, *tback, *errstr, *loc, *tmp; - int lineno; + long lineno; assert(PyErr_Occurred()); if (!PyErr_ExceptionMatches(PyExc_SyntaxError)) @@ -101,7 +101,7 @@ Py_INCREF(Py_None); loc = Py_None; } - tmp = Py_BuildValue("(ziOO)", filename, lineno, Py_None, loc); + tmp = Py_BuildValue("(zlOO)", filename, lineno, Py_None, loc); Py_DECREF(loc); if (!tmp) { Py_DECREF(errstr); @@ -261,7 +261,6 @@ /* Only a simple_stmt can contain multiple statements. */ REQ(n, simple_stmt); for (i = 0; i < NCH(n); i += 2) { - stmt_ty s; if (TYPE(CHILD(n, i)) == NEWLINE) break; s = ast_for_stmt(&c, CHILD(n, i)); @@ -1510,7 +1509,7 @@ return NULL; } - asdl_seq_SET(ops, i / 2, (void *)operator); + asdl_seq_SET(ops, i / 2, (void *)(Py_uintptr_t)operator); asdl_seq_SET(cmps, i / 2, expression); } expression = ast_for_expr(c, CHILD(n, 0)); @@ -2031,7 +2030,7 @@ return alias(NEW_IDENTIFIER(CHILD(n, 0)), NULL, c->c_arena); else { /* Create a string of the form "a.b.c" */ - int i, len; + size_t i, len; char *s; len = 0; Modified: python/trunk/Python/symtable.c ============================================================================== --- python/trunk/Python/symtable.c (original) +++ python/trunk/Python/symtable.c Sun Jan 8 02:06:06 2006 @@ -354,7 +354,7 @@ */ static int -analyze_name(PySTEntryObject *ste, PyObject *dict, PyObject *name, int flags, +analyze_name(PySTEntryObject *ste, PyObject *dict, PyObject *name, long flags, PyObject *bound, PyObject *local, PyObject *free, PyObject *global) { @@ -426,14 +426,14 @@ analyze_cells(PyObject *scope, PyObject *free) { PyObject *name, *v, *w; - int flags, pos = 0, success = 0; + int pos = 0, success = 0; w = PyInt_FromLong(CELL); if (!w) return 0; while (PyDict_Next(scope, &pos, &name, &v)) { assert(PyInt_Check(v)); - flags = PyInt_AS_LONG(v); + long flags = PyInt_AS_LONG(v); if (flags != LOCAL) continue; if (!PyDict_GetItem(free, name)) @@ -506,9 +506,10 @@ PyObject *bound, PyObject *free, int class) { PyObject *name, *v, *u, *w, *free_value = NULL; - int i, flags, pos = 0; + int pos = 0; while (PyDict_Next(symbols, &pos, &name, &v)) { + long i, flags; assert(PyInt_Check(v)); flags = PyInt_AS_LONG(v); w = PyDict_GetItem(scope, name); @@ -539,7 +540,7 @@ */ if (class && PyInt_AS_LONG(o) & (DEF_BOUND | DEF_GLOBAL)) { - i = PyInt_AS_LONG(o) | DEF_FREE_CLASS; + long i = PyInt_AS_LONG(o) | DEF_FREE_CLASS; o = PyInt_FromLong(i); if (!o) { Py_DECREF(free_value); @@ -581,7 +582,7 @@ { PyObject *name, *v, *local = NULL, *scope = NULL, *newbound = NULL; PyObject *newglobal = NULL, *newfree = NULL; - int i, flags, pos = 0, success = 0; + int i, pos = 0, success = 0; local = PyDict_New(); if (!local) @@ -614,7 +615,7 @@ assert(PySTEntry_Check(ste)); assert(PyDict_Check(ste->ste_symbols)); while (PyDict_Next(ste->ste_symbols, &pos, &name, &v)) { - flags = PyInt_AS_LONG(v); + long flags = PyInt_AS_LONG(v); if (!analyze_name(ste, scope, name, flags, bound, local, free, global)) goto error; @@ -750,7 +751,7 @@ return 1; } -static int +static long symtable_lookup(struct symtable *st, PyObject *name) { PyObject *o; @@ -769,7 +770,7 @@ { PyObject *o; PyObject *dict; - int val; + long val; PyObject *mangled = _Py_Mangle(st->st_private, name); if (!mangled) @@ -1018,7 +1019,7 @@ for (i = 0; i < asdl_seq_LEN(seq); i++) { identifier name = asdl_seq_GET(seq, i); char *c_name = PyString_AS_STRING(name); - int cur = symtable_lookup(st, name); + long cur = symtable_lookup(st, name); if (cur < 0) return 0; if (cur & (DEF_LOCAL | USE)) { From python-checkins at python.org Sun Jan 8 02:08:10 2006 From: python-checkins at python.org (neal.norwitz) Date: Sun, 8 Jan 2006 02:08:10 +0100 (CET) Subject: [Python-checkins] r41957 - python/trunk/Modules/_hashopenssl.c Message-ID: <20060108010810.0D3FC1E4002@bag.python.org> Author: neal.norwitz Date: Sun Jan 8 02:08:09 2006 New Revision: 41957 Modified: python/trunk/Modules/_hashopenssl.c Log: Fix icc warnings: HASH_OBJ_CONSTRUCTOR was not defined and using #if HASH_OBJ_CONSTRUCTOR Modified: python/trunk/Modules/_hashopenssl.c ============================================================================== --- python/trunk/Modules/_hashopenssl.c (original) +++ python/trunk/Modules/_hashopenssl.c Sun Jan 8 02:08:09 2006 @@ -18,6 +18,10 @@ #include +#ifndef HASH_OBJ_CONSTRUCTOR +#define HASH_OBJ_CONSTRUCTOR 0 +#endif + typedef struct { PyObject_HEAD PyObject *name; /* name of this hash algorithm */ From python-checkins at python.org Sun Jan 8 02:08:56 2006 From: python-checkins at python.org (neal.norwitz) Date: Sun, 8 Jan 2006 02:08:56 +0100 (CET) Subject: [Python-checkins] r41958 - python/trunk/Modules/_tkinter.c Message-ID: <20060108010856.640C31E4002@bag.python.org> Author: neal.norwitz Date: Sun Jan 8 02:08:55 2006 New Revision: 41958 Modified: python/trunk/Modules/_tkinter.c Log: Fix icc warnings: try to set TCL_UNIX_FD for #if TCL_WIN_SOCKET == TCL_UNIX_FD. res is not used, add a XXX comment Modified: python/trunk/Modules/_tkinter.c ============================================================================== --- python/trunk/Modules/_tkinter.c (original) +++ python/trunk/Modules/_tkinter.c Sun Jan 8 02:08:55 2006 @@ -98,6 +98,16 @@ #ifdef HAVE_CREATEFILEHANDLER +/* This bit is to ensure that TCL_UNIX_FD is defined and doesn't interfere + with the proper calculation of FHANDLETYPE == TCL_UNIX_FD below. */ +#ifndef TCL_UNIX_FD +# ifdef TCL_WIN_SOCKET +# define TCL_UNIX_FD (! TCL_WIN_SOCKET) +# else +# define TCL_UNIX_FD 1 +# endif +#endif + /* Tcl_CreateFileHandler() changed several times; these macros deal with the messiness. In Tcl 8.0 and later, it is not available on Windows (and on Unix, only because Jack added it back); when available on Windows, it only @@ -2619,6 +2629,7 @@ err = Tcl_Eval(Tkapp_Interp(self), "info exists tk_version"); ENTER_OVERLAP if (err == TCL_ERROR) { + /* XXX: shouldn't we do something with res? */ res = Tkinter_Error(self); } else { _tk_exists = Tkapp_Result(self); From python-checkins at python.org Sun Jan 8 02:10:35 2006 From: python-checkins at python.org (neal.norwitz) Date: Sun, 8 Jan 2006 02:10:35 +0100 (CET) Subject: [Python-checkins] r41959 - python/trunk/Parser/pgen.c Message-ID: <20060108011035.39E241E401E@bag.python.org> Author: neal.norwitz Date: Sun Jan 8 02:10:34 2006 New Revision: 41959 Modified: python/trunk/Parser/pgen.c Log: Fix icc warnings. This couldn't have been correct since i is checked for 2 different values without changing. I think this was the intent. The unused warning only occurs when not building in debug mode. Modified: python/trunk/Parser/pgen.c ============================================================================== --- python/trunk/Parser/pgen.c (original) +++ python/trunk/Parser/pgen.c Sun Jan 8 02:10:34 2006 @@ -276,14 +276,11 @@ static void compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb) { - int i; - REQ(n, ATOM); - i = n->n_nchildren; - REQN(i, 1); + REQN(n->n_nchildren, 1); n = n->n_child; if (n->n_type == LPAR) { - REQN(i, 3); + REQN(n->n_nchildren, 3); n++; REQ(n, RHS); compile_rhs(ll, nf, n, pa, pb); From python-checkins at python.org Sun Jan 8 02:11:27 2006 From: python-checkins at python.org (neal.norwitz) Date: Sun, 8 Jan 2006 02:11:27 +0100 (CET) Subject: [Python-checkins] r41960 - python/trunk/Objects/floatobject.c Message-ID: <20060108011127.97AC31E4031@bag.python.org> Author: neal.norwitz Date: Sun Jan 8 02:11:25 2006 New Revision: 41960 Modified: python/trunk/Objects/floatobject.c Log: Fix icc warnings: remove unused variable Modified: python/trunk/Objects/floatobject.c ============================================================================== --- python/trunk/Objects/floatobject.c (original) +++ python/trunk/Objects/floatobject.c Sun Jan 8 02:11:25 2006 @@ -425,7 +425,6 @@ int vsign = i == 0.0 ? 0 : i < 0.0 ? -1 : 1; int wsign = _PyLong_Sign(w); size_t nbits; - double mant; int exponent; if (vsign != wsign) { @@ -471,7 +470,7 @@ op = _Py_SwappedOp[op]; } assert(i > 0.0); - mant = frexp(i, &exponent); + (void) frexp(i, &exponent); /* exponent is the # of bits in v before the radix point; * we know that nbits (the # of bits in w) > 48 at this point */ From python-checkins at python.org Sun Jan 8 02:12:13 2006 From: python-checkins at python.org (neal.norwitz) Date: Sun, 8 Jan 2006 02:12:13 +0100 (CET) Subject: [Python-checkins] r41961 - python/trunk/Objects/unicodeobject.c Message-ID: <20060108011213.799771E4029@bag.python.org> Author: neal.norwitz Date: Sun Jan 8 02:12:10 2006 New Revision: 41961 Modified: python/trunk/Objects/unicodeobject.c Log: Fix icc warnings: remove (sometimes) unused variable conditionally Modified: python/trunk/Objects/unicodeobject.c ============================================================================== --- python/trunk/Objects/unicodeobject.c (original) +++ python/trunk/Objects/unicodeobject.c Sun Jan 8 02:12:10 2006 @@ -2291,7 +2291,6 @@ int startinpos; int endinpos; int outpos; - Py_UNICODE unimax; PyUnicodeObject *v; Py_UNICODE *p; const char *end; @@ -2299,7 +2298,10 @@ PyObject *errorHandler = NULL; PyObject *exc = NULL; - unimax = PyUnicode_GetMax(); +#ifdef Py_UNICODE_WIDE + Py_UNICODE unimax = PyUnicode_GetMax(); +#endif + v = _PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE); if (v == NULL) goto onError; From python-checkins at python.org Sun Jan 8 02:17:42 2006 From: python-checkins at python.org (neal.norwitz) Date: Sun, 8 Jan 2006 02:17:42 +0100 (CET) Subject: [Python-checkins] r41962 - python/trunk Message-ID: <20060108011742.651E51E4007@bag.python.org> Author: neal.norwitz Date: Sun Jan 8 02:17:40 2006 New Revision: 41962 Modified: python/trunk/ (props changed) Log: ignore db_home (result of db tests) and autom4te.cache from autoconf From python-checkins at python.org Sun Jan 8 02:18:13 2006 From: python-checkins at python.org (neal.norwitz) Date: Sun, 8 Jan 2006 02:18:13 +0100 (CET) Subject: [Python-checkins] r41963 - python/trunk/Lib/test Message-ID: <20060108011813.E09631E4007@bag.python.org> Author: neal.norwitz Date: Sun Jan 8 02:18:12 2006 New Revision: 41963 Modified: python/trunk/Lib/test/ (props changed) Log: ignore db_home (result of db tests) From python-checkins at python.org Sun Jan 8 03:06:02 2006 From: python-checkins at python.org (neal.norwitz) Date: Sun, 8 Jan 2006 03:06:02 +0100 (CET) Subject: [Python-checkins] r41964 - python/trunk/Parser/pgen.c Message-ID: <20060108020602.8269A1E4002@bag.python.org> Author: neal.norwitz Date: Sun Jan 8 03:06:01 2006 New Revision: 41964 Modified: python/trunk/Parser/pgen.c Log: Revert previous checkin, the check is for <, not ==. i is unsed in non-debug builds, but is used in debug builds Modified: python/trunk/Parser/pgen.c ============================================================================== --- python/trunk/Parser/pgen.c (original) +++ python/trunk/Parser/pgen.c Sun Jan 8 03:06:01 2006 @@ -276,11 +276,14 @@ static void compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb) { + int i; + REQ(n, ATOM); - REQN(n->n_nchildren, 1); + i = n->n_nchildren; + REQN(i, 1); n = n->n_child; if (n->n_type == LPAR) { - REQN(n->n_nchildren, 3); + REQN(i, 3); n++; REQ(n, RHS); compile_rhs(ll, nf, n, pa, pb); From python-checkins at python.org Sun Jan 8 03:10:40 2006 From: python-checkins at python.org (neal.norwitz) Date: Sun, 8 Jan 2006 03:10:40 +0100 (CET) Subject: [Python-checkins] r41965 - in python/branches/ssize_t: Include/symtable.h Lib/test Lib/test/test_compiler.py Modules/_hashopenssl.c Modules/_tkinter.c Modules/expat/xmlparse.c Modules/linuxaudiodev.c Modules/ossaudiodev.c Modules/zipimport.c Objects/floatobject.c Objects/unicodeobject.c Python/ast.c Python/compile.c Python/pystrtod.c Python/symtable.c configure configure.in pyconfig.h.in Message-ID: <20060108021040.6B1931E4032@bag.python.org> Author: neal.norwitz Date: Sun Jan 8 03:10:26 2006 New Revision: 41965 Modified: python/branches/ssize_t/ (props changed) python/branches/ssize_t/Include/symtable.h python/branches/ssize_t/Lib/test/ (props changed) python/branches/ssize_t/Lib/test/test_compiler.py python/branches/ssize_t/Modules/_hashopenssl.c python/branches/ssize_t/Modules/_tkinter.c python/branches/ssize_t/Modules/expat/xmlparse.c python/branches/ssize_t/Modules/linuxaudiodev.c python/branches/ssize_t/Modules/ossaudiodev.c python/branches/ssize_t/Modules/zipimport.c python/branches/ssize_t/Objects/floatobject.c python/branches/ssize_t/Objects/unicodeobject.c python/branches/ssize_t/Python/ast.c python/branches/ssize_t/Python/compile.c python/branches/ssize_t/Python/pystrtod.c python/branches/ssize_t/Python/symtable.c python/branches/ssize_t/configure python/branches/ssize_t/configure.in python/branches/ssize_t/pyconfig.h.in Log: Merge with trunk:41964. Modified: python/branches/ssize_t/Include/symtable.h ============================================================================== --- python/branches/ssize_t/Include/symtable.h (original) +++ python/branches/ssize_t/Include/symtable.h Sun Jan 8 03:10:26 2006 @@ -31,13 +31,13 @@ PyObject *ste_children; /* list of child ids */ _Py_block_ty ste_type; /* module, class, or function */ int ste_unoptimized; /* false if namespace is optimized */ - int ste_nested : 1; /* true if block is nested */ - int ste_free : 1; /* true if block has free variables */ - int ste_child_free : 1; /* true if a child block has free variables, - including free refs to globals */ - int ste_generator : 1; /* true if namespace is a generator */ - int ste_varargs : 1; /* true if block has varargs */ - int ste_varkeywords : 1; /* true if block has varkeywords */ + unsigned ste_nested : 1; /* true if block is nested */ + unsigned ste_free : 1; /* true if block has free variables */ + unsigned ste_child_free : 1; /* true if a child block has free vars, + including free refs to globals */ + unsigned ste_generator : 1; /* true if namespace is a generator */ + unsigned ste_varargs : 1; /* true if block has varargs */ + unsigned ste_varkeywords : 1; /* true if block has varkeywords */ int ste_lineno; /* first line of block */ int ste_opt_lineno; /* lineno of last exec or import * */ int ste_tmpname; /* counter for listcomp temp vars */ Modified: python/branches/ssize_t/Lib/test/test_compiler.py ============================================================================== --- python/branches/ssize_t/Lib/test/test_compiler.py (original) +++ python/branches/ssize_t/Lib/test/test_compiler.py Sun Jan 8 03:10:26 2006 @@ -12,6 +12,7 @@ # standard library and its test suite. This doesn't verify # that any of the code is correct, merely the compiler is able # to generate some kind of code for it. + libdir = os.path.dirname(unittest.__file__) testdir = os.path.dirname(test.test_support.__file__) @@ -35,10 +36,6 @@ def testNewClassSyntax(self): compiler.compile("class foo():pass\n\n","","exec") - - def testSyntaxErrors(self): - self.assertRaises(SyntaxError, compiler.compile, - "def foo(a=1,b):pass\n\n", "", "exec") def testLineNo(self): # Test that all nodes except Module have a correct lineno attribute. Modified: python/branches/ssize_t/Modules/_hashopenssl.c ============================================================================== --- python/branches/ssize_t/Modules/_hashopenssl.c (original) +++ python/branches/ssize_t/Modules/_hashopenssl.c Sun Jan 8 03:10:26 2006 @@ -18,6 +18,10 @@ #include +#ifndef HASH_OBJ_CONSTRUCTOR +#define HASH_OBJ_CONSTRUCTOR 0 +#endif + typedef struct { PyObject_HEAD PyObject *name; /* name of this hash algorithm */ @@ -33,12 +37,12 @@ static EVP_MD_CTX CONST_new_ ## Name ## _ctx; \ static EVP_MD_CTX *CONST_new_ ## Name ## _ctx_p = NULL; -DEFINE_CONSTS_FOR_NEW(md5); -DEFINE_CONSTS_FOR_NEW(sha1); -DEFINE_CONSTS_FOR_NEW(sha224); -DEFINE_CONSTS_FOR_NEW(sha256); -DEFINE_CONSTS_FOR_NEW(sha384); -DEFINE_CONSTS_FOR_NEW(sha512); +DEFINE_CONSTS_FOR_NEW(md5) +DEFINE_CONSTS_FOR_NEW(sha1) +DEFINE_CONSTS_FOR_NEW(sha224) +DEFINE_CONSTS_FOR_NEW(sha256) +DEFINE_CONSTS_FOR_NEW(sha384) +DEFINE_CONSTS_FOR_NEW(sha512) static EVPobject * @@ -101,7 +105,7 @@ EVP_MD_CTX_copy(&temp_ctx, &self->ctx); digest_size = EVP_MD_CTX_size(&temp_ctx); - EVP_DigestFinal(&temp_ctx, (char *)digest, NULL); + EVP_DigestFinal(&temp_ctx, digest, NULL); retval = PyString_FromStringAndSize((const char *)digest, digest_size); EVP_MD_CTX_cleanup(&temp_ctx); @@ -329,7 +333,7 @@ static PyObject * EVPnew(PyObject *name_obj, const EVP_MD *digest, const EVP_MD_CTX *initial_ctx, - const char *cp, unsigned int len) + const unsigned char *cp, unsigned int len) { EVPobject *self; Modified: python/branches/ssize_t/Modules/_tkinter.c ============================================================================== --- python/branches/ssize_t/Modules/_tkinter.c (original) +++ python/branches/ssize_t/Modules/_tkinter.c Sun Jan 8 03:10:26 2006 @@ -98,6 +98,16 @@ #ifdef HAVE_CREATEFILEHANDLER +/* This bit is to ensure that TCL_UNIX_FD is defined and doesn't interfere + with the proper calculation of FHANDLETYPE == TCL_UNIX_FD below. */ +#ifndef TCL_UNIX_FD +# ifdef TCL_WIN_SOCKET +# define TCL_UNIX_FD (! TCL_WIN_SOCKET) +# else +# define TCL_UNIX_FD 1 +# endif +#endif + /* Tcl_CreateFileHandler() changed several times; these macros deal with the messiness. In Tcl 8.0 and later, it is not available on Windows (and on Unix, only because Jack added it back); when available on Windows, it only @@ -2619,6 +2629,7 @@ err = Tcl_Eval(Tkapp_Interp(self), "info exists tk_version"); ENTER_OVERLAP if (err == TCL_ERROR) { + /* XXX: shouldn't we do something with res? */ res = Tkinter_Error(self); } else { _tk_exists = Tkapp_Result(self); Modified: python/branches/ssize_t/Modules/expat/xmlparse.c ============================================================================== --- python/branches/ssize_t/Modules/expat/xmlparse.c (original) +++ python/branches/ssize_t/Modules/expat/xmlparse.c Sun Jan 8 03:10:26 2006 @@ -1539,7 +1539,7 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { const char *start; - enum XML_Error result = XML_STATUS_OK; + enum XML_Status result = XML_STATUS_OK; switch (parsing) { case XML_SUSPENDED: @@ -1698,7 +1698,7 @@ enum XML_Status XMLCALL XML_ResumeParser(XML_Parser parser) { - enum XML_Error result = XML_STATUS_OK; + enum XML_Status result = XML_STATUS_OK; if (parsing != XML_SUSPENDED) { errorCode = XML_ERROR_NOT_SUSPENDED; Modified: python/branches/ssize_t/Modules/linuxaudiodev.c ============================================================================== --- python/branches/ssize_t/Modules/linuxaudiodev.c (original) +++ python/branches/ssize_t/Modules/linuxaudiodev.c Sun Jan 8 03:10:26 2006 @@ -40,7 +40,7 @@ #endif typedef struct { - PyObject_HEAD; + PyObject_HEAD int x_fd; /* The open file */ int x_mode; /* file mode */ int x_icount; /* Input count */ Modified: python/branches/ssize_t/Modules/ossaudiodev.c ============================================================================== --- python/branches/ssize_t/Modules/ossaudiodev.c (original) +++ python/branches/ssize_t/Modules/ossaudiodev.c Sun Jan 8 03:10:26 2006 @@ -45,7 +45,7 @@ #endif typedef struct { - PyObject_HEAD; + PyObject_HEAD char *devicename; /* name of the device file */ int fd; /* file descriptor */ int mode; /* file mode (O_RDONLY, etc.) */ @@ -55,7 +55,7 @@ } oss_audio_t; typedef struct { - PyObject_HEAD; + PyObject_HEAD int fd; /* The open mixer device */ } oss_mixer_t; Modified: python/branches/ssize_t/Modules/zipimport.c ============================================================================== --- python/branches/ssize_t/Modules/zipimport.c (original) +++ python/branches/ssize_t/Modules/zipimport.c Sun Jan 8 03:10:26 2006 @@ -660,7 +660,8 @@ FILE *fp; long compress, crc, data_size, file_size, file_offset, date, time; long header_offset, name_size, header_size, header_position; - long i, l, length, count; + long i, l, count; + size_t length; char path[MAXPATHLEN + 5]; char name[MAXPATHLEN + 5]; char *p, endof_central_dir[22]; Modified: python/branches/ssize_t/Objects/floatobject.c ============================================================================== --- python/branches/ssize_t/Objects/floatobject.c (original) +++ python/branches/ssize_t/Objects/floatobject.c Sun Jan 8 03:10:26 2006 @@ -425,7 +425,6 @@ int vsign = i == 0.0 ? 0 : i < 0.0 ? -1 : 1; int wsign = _PyLong_Sign(w); size_t nbits; - double mant; int exponent; if (vsign != wsign) { @@ -471,7 +470,7 @@ op = _Py_SwappedOp[op]; } assert(i > 0.0); - mant = frexp(i, &exponent); + (void) frexp(i, &exponent); /* exponent is the # of bits in v before the radix point; * we know that nbits (the # of bits in w) > 48 at this point */ Modified: python/branches/ssize_t/Objects/unicodeobject.c ============================================================================== --- python/branches/ssize_t/Objects/unicodeobject.c (original) +++ python/branches/ssize_t/Objects/unicodeobject.c Sun Jan 8 03:10:26 2006 @@ -2294,7 +2294,6 @@ Py_ssize_t startinpos; Py_ssize_t endinpos; Py_ssize_t outpos; - Py_UNICODE unimax; PyUnicodeObject *v; Py_UNICODE *p; const char *end; @@ -2302,7 +2301,10 @@ PyObject *errorHandler = NULL; PyObject *exc = NULL; - unimax = PyUnicode_GetMax(); +#ifdef Py_UNICODE_WIDE + Py_UNICODE unimax = PyUnicode_GetMax(); +#endif + v = _PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE); if (v == NULL) goto onError; Modified: python/branches/ssize_t/Python/ast.c ============================================================================== --- python/branches/ssize_t/Python/ast.c (original) +++ python/branches/ssize_t/Python/ast.c Sun Jan 8 03:10:26 2006 @@ -78,7 +78,7 @@ ast_error_finish(const char *filename) { PyObject *type, *value, *tback, *errstr, *loc, *tmp; - int lineno; + long lineno; assert(PyErr_Occurred()); if (!PyErr_ExceptionMatches(PyExc_SyntaxError)) @@ -101,7 +101,7 @@ Py_INCREF(Py_None); loc = Py_None; } - tmp = Py_BuildValue("(ziOO)", filename, lineno, Py_None, loc); + tmp = Py_BuildValue("(zlOO)", filename, lineno, Py_None, loc); Py_DECREF(loc); if (!tmp) { Py_DECREF(errstr); @@ -252,7 +252,7 @@ if (!stmts) goto error; if (num == 1) { - stmt_ty s = ast_for_stmt(&c, n); + s = ast_for_stmt(&c, n); if (!s) goto error; asdl_seq_SET(stmts, 0, s); @@ -261,7 +261,6 @@ /* Only a simple_stmt can contain multiple statements. */ REQ(n, simple_stmt); for (i = 0; i < NCH(n); i += 2) { - stmt_ty s; if (TYPE(CHILD(n, i)) == NEWLINE) break; s = ast_for_stmt(&c, CHILD(n, i)); @@ -1092,7 +1091,6 @@ return NULL; for (j = 0; j < n_ifs; j++) { - expr_ty expression; REQ(ch, gen_iter); ch = CHILD(ch, 0); REQ(ch, gen_if); @@ -1511,7 +1509,7 @@ return NULL; } - asdl_seq_SET(ops, i / 2, (void *)operator); + asdl_seq_SET(ops, i / 2, (void *)(Py_uintptr_t)operator); asdl_seq_SET(cmps, i / 2, expression); } expression = ast_for_expr(c, CHILD(n, 0)); @@ -2032,7 +2030,7 @@ return alias(NEW_IDENTIFIER(CHILD(n, 0)), NULL, c->c_arena); else { /* Create a string of the form "a.b.c" */ - int i, len; + size_t i, len; char *s; len = 0; Modified: python/branches/ssize_t/Python/compile.c ============================================================================== --- python/branches/ssize_t/Python/compile.c (original) +++ python/branches/ssize_t/Python/compile.c Sun Jan 8 03:10:26 2006 @@ -51,9 +51,9 @@ #define DEFAULT_LNOTAB_SIZE 16 struct instr { - int i_jabs : 1; - int i_jrel : 1; - int i_hasarg : 1; + unsigned i_jabs : 1; + unsigned i_jrel : 1; + unsigned i_hasarg : 1; unsigned char i_opcode; int i_oparg; struct basicblock_ *i_target; /* target block (if jump instruction) */ @@ -74,9 +74,9 @@ block reached by normal control flow. */ struct basicblock_ *b_next; /* b_seen is used to perform a DFS of basicblocks. */ - int b_seen : 1; + unsigned b_seen : 1; /* b_return is true if a RETURN_VALUE opcode is inserted. */ - int b_return : 1; + unsigned b_return : 1; /* depth of stack upon entry of block, computed by stackdepth() */ int b_startdepth; /* instruction offset for block, computed by assemble_jump_offsets() */ @@ -1677,20 +1677,20 @@ } #define VISIT_SEQ(C, TYPE, SEQ) { \ - int i; \ + int _i; \ asdl_seq *seq = (SEQ); /* avoid variable capture */ \ - for (i = 0; i < asdl_seq_LEN(seq); i++) { \ - TYPE ## _ty elt = asdl_seq_GET(seq, i); \ + for (_i = 0; _i < asdl_seq_LEN(seq); _i++) { \ + TYPE ## _ty elt = asdl_seq_GET(seq, _i); \ if (!compiler_visit_ ## TYPE((C), elt)) \ return 0; \ } \ } #define VISIT_SEQ_IN_SCOPE(C, TYPE, SEQ) { \ - int i; \ + int _i; \ asdl_seq *seq = (SEQ); /* avoid variable capture */ \ - for (i = 0; i < asdl_seq_LEN(seq); i++) { \ - TYPE ## _ty elt = asdl_seq_GET(seq, i); \ + for (_i = 0; _i < asdl_seq_LEN(seq); _i++) { \ + TYPE ## _ty elt = asdl_seq_GET(seq, _i); \ if (!compiler_visit_ ## TYPE((C), elt)) { \ compiler_exit_scope(c); \ return 0; \ @@ -3863,7 +3863,7 @@ return 1; if (d_bytecode > 255) { - int i, nbytes, ncodes = d_bytecode / 255; + int j, nbytes, ncodes = d_bytecode / 255; nbytes = a->a_lnotab_off + 2 * ncodes; len = PyString_GET_SIZE(a->a_lnotab); if (nbytes >= len) { @@ -3875,7 +3875,7 @@ return 0; } lnotab = PyString_AS_STRING(a->a_lnotab) + a->a_lnotab_off; - for (i = 0; i < ncodes; i++) { + for (j = 0; j < ncodes; j++) { *lnotab++ = 255; *lnotab++ = 0; } @@ -3884,7 +3884,7 @@ } assert(d_bytecode <= 255); if (d_lineno > 255) { - int i, nbytes, ncodes = d_lineno / 255; + int j, nbytes, ncodes = d_lineno / 255; nbytes = a->a_lnotab_off + 2 * ncodes; len = PyString_GET_SIZE(a->a_lnotab); if (nbytes >= len) { @@ -3899,7 +3899,7 @@ *lnotab++ = 255; *lnotab++ = d_bytecode; d_bytecode = 0; - for (i = 1; i < ncodes; i++) { + for (j = 1; j < ncodes; j++) { *lnotab++ = 255; *lnotab++ = 0; } @@ -4194,7 +4194,7 @@ /* Emit code in reverse postorder from dfs. */ for (i = a.a_nblocks - 1; i >= 0; i--) { - basicblock *b = a.a_postorder[i]; + b = a.a_postorder[i]; for (j = 0; j < b->b_iused; j++) if (!assemble_emit(&a, &b->b_instr[j])) goto error; Modified: python/branches/ssize_t/Python/pystrtod.c ============================================================================== --- python/branches/ssize_t/Python/pystrtod.c (original) +++ python/branches/ssize_t/Python/pystrtod.c Sun Jan 8 03:10:26 2006 @@ -44,7 +44,7 @@ double val = -1.0; struct lconv *locale_data; const char *decimal_point; - int decimal_point_len; + size_t decimal_point_len; const char *p, *decimal_point_pos; const char *end = NULL; /* Silence gcc */ @@ -165,9 +165,8 @@ { struct lconv *locale_data; const char *decimal_point; - int decimal_point_len; + size_t decimal_point_len, rest_len; char *p; - int rest_len; char format_char; /* g_return_val_if_fail (buffer != NULL, NULL); */ Modified: python/branches/ssize_t/Python/symtable.c ============================================================================== --- python/branches/ssize_t/Python/symtable.c (original) +++ python/branches/ssize_t/Python/symtable.c Sun Jan 8 03:10:26 2006 @@ -354,7 +354,7 @@ */ static int -analyze_name(PySTEntryObject *ste, PyObject *dict, PyObject *name, int flags, +analyze_name(PySTEntryObject *ste, PyObject *dict, PyObject *name, long flags, PyObject *bound, PyObject *local, PyObject *free, PyObject *global) { @@ -426,7 +426,7 @@ analyze_cells(PyObject *scope, PyObject *free) { PyObject *name, *v, *w; - int flags, success = 0; + int success = 0; Py_ssize_t pos = 0; w = PyInt_FromLong(CELL); @@ -434,7 +434,7 @@ return 0; while (PyDict_Next(scope, &pos, &name, &v)) { assert(PyInt_Check(v)); - flags = PyInt_AS_LONG(v); + long flags = PyInt_AS_LONG(v); if (flags != LOCAL) continue; if (!PyDict_GetItem(free, name)) @@ -507,10 +507,10 @@ PyObject *bound, PyObject *free, int class) { PyObject *name, *v, *u, *w, *free_value = NULL; - int i, flags; Py_ssize_t pos = 0; while (PyDict_Next(symbols, &pos, &name, &v)) { + long i, flags; assert(PyInt_Check(v)); flags = PyInt_AS_LONG(v); w = PyDict_GetItem(scope, name); @@ -541,7 +541,7 @@ */ if (class && PyInt_AS_LONG(o) & (DEF_BOUND | DEF_GLOBAL)) { - int i = PyInt_AS_LONG(o) | DEF_FREE_CLASS; + long i = PyInt_AS_LONG(o) | DEF_FREE_CLASS; o = PyInt_FromLong(i); if (!o) { Py_DECREF(free_value); @@ -583,7 +583,7 @@ { PyObject *name, *v, *local = NULL, *scope = NULL, *newbound = NULL; PyObject *newglobal = NULL, *newfree = NULL; - int i, flags, success = 0; + int i, success = 0; Py_ssize_t pos = 0; local = PyDict_New(); @@ -617,7 +617,7 @@ assert(PySTEntry_Check(ste)); assert(PyDict_Check(ste->ste_symbols)); while (PyDict_Next(ste->ste_symbols, &pos, &name, &v)) { - flags = PyInt_AS_LONG(v); + long flags = PyInt_AS_LONG(v); if (!analyze_name(ste, scope, name, flags, bound, local, free, global)) goto error; @@ -753,7 +753,7 @@ return 1; } -static int +static long symtable_lookup(struct symtable *st, PyObject *name) { PyObject *o; @@ -772,7 +772,7 @@ { PyObject *o; PyObject *dict; - int val; + long val; PyObject *mangled = _Py_Mangle(st->st_private, name); if (!mangled) @@ -1021,7 +1021,7 @@ for (i = 0; i < asdl_seq_LEN(seq); i++) { identifier name = asdl_seq_GET(seq, i); char *c_name = PyString_AS_STRING(name); - int cur = symtable_lookup(st, name); + long cur = symtable_lookup(st, name); if (cur < 0) return 0; if (cur & (DEF_LOCAL | USE)) { @@ -1173,7 +1173,7 @@ static int symtable_visit_params(struct symtable *st, asdl_seq *args, int toplevel) { - int i, complex = 0; + int i; /* go through all the toplevel arguments first */ for (i = 0; i < asdl_seq_LEN(args); i++) { @@ -1186,7 +1186,6 @@ } else if (arg->kind == Tuple_kind) { assert(arg->v.Tuple.ctx == Store); - complex = 1; if (toplevel) { if (!symtable_implicit_arg(st, i)) return 0; Modified: python/branches/ssize_t/configure ============================================================================== --- python/branches/ssize_t/configure (original) +++ python/branches/ssize_t/configure Sun Jan 8 03:10:26 2006 @@ -3911,6 +3911,12 @@ BASECFLAGS="$BASECFLAGS $ac_arch_flags" fi +# disable check for icc since it seems to pass, but generates a warning +if test "$CC" = icc +then + ac_cv_opt_olimit_ok=no +fi + echo "$as_me:$LINENO: checking whether $CC accepts -OPT:Olimit=0" >&5 echo $ECHO_N "checking whether $CC accepts -OPT:Olimit=0... $ECHO_C" >&6 if test "${ac_cv_opt_olimit_ok+set}" = set; then @@ -20172,6 +20178,159 @@ fi +# check where readline lives + +echo "$as_me:$LINENO: checking for readline in -lreadline" >&5 +echo $ECHO_N "checking for readline in -lreadline... $ECHO_C" >&6 +if test "${ac_cv_lib_readline_readline+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char readline (); +int +main () +{ +readline (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_readline_readline=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_lib_readline_readline=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +echo "$as_me:$LINENO: result: $ac_cv_lib_readline_readline" >&5 +echo "${ECHO_T}$ac_cv_lib_readline_readline" >&6 +if test $ac_cv_lib_readline_readline = yes; then + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBREADLINE 1 +_ACEOF + + LIBS="-lreadline $LIBS" + +fi + +if test "$ac_cv_have_readline_readline" = no +then + +echo "$as_me:$LINENO: checking for readline in -ltermcap" >&5 +echo $ECHO_N "checking for readline in -ltermcap... $ECHO_C" >&6 +if test "${ac_cv_lib_termcap_readline+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ltermcap $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char readline (); +int +main () +{ +readline (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_termcap_readline=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_lib_termcap_readline=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +echo "$as_me:$LINENO: result: $ac_cv_lib_termcap_readline" >&5 +echo "${ECHO_T}$ac_cv_lib_termcap_readline" >&6 +if test $ac_cv_lib_termcap_readline = yes; then + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBTERMCAP 1 +_ACEOF + + LIBS="-ltermcap $LIBS" + +fi + +fi + # check for readline 2.1 echo "$as_me:$LINENO: checking for rl_callback_handler_install in -lreadline" >&5 echo $ECHO_N "checking for rl_callback_handler_install in -lreadline... $ECHO_C" >&6 @@ -20179,7 +20338,7 @@ echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_check_lib_save_LIBS=$LIBS -LIBS="-lreadline -ltermcap $LIBS" +LIBS="-lreadline $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -20311,7 +20470,7 @@ echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_check_lib_save_LIBS=$LIBS -LIBS="-lreadline -ltermcap $LIBS" +LIBS="-lreadline $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -20385,7 +20544,7 @@ echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_check_lib_save_LIBS=$LIBS -LIBS="-lreadline -ltermcap $LIBS" +LIBS="-lreadline $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF Modified: python/branches/ssize_t/configure.in ============================================================================== --- python/branches/ssize_t/configure.in (original) +++ python/branches/ssize_t/configure.in Sun Jan 8 03:10:26 2006 @@ -775,6 +775,12 @@ BASECFLAGS="$BASECFLAGS $ac_arch_flags" fi +# disable check for icc since it seems to pass, but generates a warning +if test "$CC" = icc +then + ac_cv_opt_olimit_ok=no +fi + AC_MSG_CHECKING(whether $CC accepts -OPT:Olimit=0) AC_CACHE_VAL(ac_cv_opt_olimit_ok, [ac_save_cc="$CC" @@ -2874,10 +2880,17 @@ [Define this if you have flockfile(), getc_unlocked(), and funlockfile()]) fi +# check where readline lives +AC_CHECK_LIB(readline, readline) +if test "$ac_cv_have_readline_readline" = no +then + AC_CHECK_LIB(termcap, readline) +fi + # check for readline 2.1 AC_CHECK_LIB(readline, rl_callback_handler_install, AC_DEFINE(HAVE_RL_CALLBACK, 1, - [Define if you have readline 2.1]), , -ltermcap) + [Define if you have readline 2.1]), , ) # check for readline 2.2 AC_TRY_CPP([#include ], @@ -2893,12 +2906,12 @@ # check for readline 4.0 AC_CHECK_LIB(readline, rl_pre_input_hook, AC_DEFINE(HAVE_RL_PRE_INPUT_HOOK, 1, - [Define if you have readline 4.0]), , -ltermcap) + [Define if you have readline 4.0]), , ) # check for readline 4.2 AC_CHECK_LIB(readline, rl_completion_matches, AC_DEFINE(HAVE_RL_COMPLETION_MATCHES, 1, - [Define if you have readline 4.2]), , -ltermcap) + [Define if you have readline 4.2]), , ) # also in readline 4.2 AC_TRY_CPP([#include ], Modified: python/branches/ssize_t/pyconfig.h.in ============================================================================== --- python/branches/ssize_t/pyconfig.h.in (original) +++ python/branches/ssize_t/pyconfig.h.in Sun Jan 8 03:10:26 2006 @@ -275,9 +275,15 @@ /* Define to 1 if you have the header file. */ #undef HAVE_LIBINTL_H +/* Define to 1 if you have the `readline' library (-lreadline). */ +#undef HAVE_LIBREADLINE + /* Define to 1 if you have the `resolv' library (-lresolv). */ #undef HAVE_LIBRESOLV +/* Define to 1 if you have the `termcap' library (-ltermcap). */ +#undef HAVE_LIBTERMCAP + /* Define to 1 if you have the header file. */ #undef HAVE_LIBUTIL_H From python-checkins at python.org Sun Jan 8 03:19:08 2006 From: python-checkins at python.org (tim.peters) Date: Sun, 8 Jan 2006 03:19:08 +0100 (CET) Subject: [Python-checkins] r41966 - python/trunk/Python/symtable.c Message-ID: <20060108021908.1F7701E4009@bag.python.org> Author: tim.peters Date: Sun Jan 8 03:19:07 2006 New Revision: 41966 Modified: python/trunk/Python/symtable.c Log: analyze_cells(): This no longer compiled under VC 7.1. Move declaration of local `flags` to top of block. Modified: python/trunk/Python/symtable.c ============================================================================== --- python/trunk/Python/symtable.c (original) +++ python/trunk/Python/symtable.c Sun Jan 8 03:19:07 2006 @@ -432,8 +432,9 @@ if (!w) return 0; while (PyDict_Next(scope, &pos, &name, &v)) { + long flags; assert(PyInt_Check(v)); - long flags = PyInt_AS_LONG(v); + flags = PyInt_AS_LONG(v); if (flags != LOCAL) continue; if (!PyDict_GetItem(free, name)) From python-checkins at python.org Sun Jan 8 03:20:15 2006 From: python-checkins at python.org (neal.norwitz) Date: Sun, 8 Jan 2006 03:20:15 +0100 (CET) Subject: [Python-checkins] r41967 - in python/branches/ssize_t: Modules/_elementtree.c Objects/structseq.c Message-ID: <20060108022015.81B851E4009@bag.python.org> Author: neal.norwitz Date: Sun Jan 8 03:20:14 2006 New Revision: 41967 Modified: python/branches/ssize_t/Modules/_elementtree.c python/branches/ssize_t/Objects/structseq.c Log: Use proper length function return type Modified: python/branches/ssize_t/Modules/_elementtree.c ============================================================================== --- python/branches/ssize_t/Modules/_elementtree.c (original) +++ python/branches/ssize_t/Modules/_elementtree.c Sun Jan 8 03:20:14 2006 @@ -1024,7 +1024,7 @@ return PyDict_Keys(self->extra->attrib); } -static int +static Py_ssize_t element_length(ElementObject* self) { if (!self->extra) @@ -1375,7 +1375,7 @@ } static PySequenceMethods element_as_sequence = { - (inquiry) element_length, + (lenfunc) element_length, 0, /* sq_concat */ 0, /* sq_repeat */ element_getitem, Modified: python/branches/ssize_t/Objects/structseq.c ============================================================================== --- python/branches/ssize_t/Objects/structseq.c (original) +++ python/branches/ssize_t/Objects/structseq.c Sun Jan 8 03:20:14 2006 @@ -49,7 +49,7 @@ PyObject_Del(obj); } -static int +static Py_ssize_t structseq_length(PyStructSequence *obj) { return VISIBLE_SIZE(obj); @@ -284,7 +284,7 @@ } static PySequenceMethods structseq_as_sequence = { - (inquiry)structseq_length, + (lenfunc)structseq_length, (binaryfunc)structseq_concat, /* sq_concat */ (ssizeargfunc)structseq_repeat, /* sq_repeat */ (ssizeargfunc)structseq_item, /* sq_item */ From python-checkins at python.org Sun Jan 8 03:25:38 2006 From: python-checkins at python.org (tim.peters) Date: Sun, 8 Jan 2006 03:25:38 +0100 (CET) Subject: [Python-checkins] r41968 - python/trunk/Python/ast.c Message-ID: <20060108022538.5BF6B1E4009@bag.python.org> Author: tim.peters Date: Sun Jan 8 03:25:34 2006 New Revision: 41968 Modified: python/trunk/Python/ast.c Log: alias_for_import_name(): Dueling compiler warnings ;-) Squash new warnings from VC 7.1 about mixing signed and unsigned types in comparisons. I can see why `len` was changed to size_t here, but don't see why `i` was also changed. Change `i` back to int. Modified: python/trunk/Python/ast.c ============================================================================== --- python/trunk/Python/ast.c (original) +++ python/trunk/Python/ast.c Sun Jan 8 03:25:34 2006 @@ -2030,7 +2030,8 @@ return alias(NEW_IDENTIFIER(CHILD(n, 0)), NULL, c->c_arena); else { /* Create a string of the form "a.b.c" */ - size_t i, len; + int i; + size_t len; char *s; len = 0; From python-checkins at python.org Sun Jan 8 03:28:42 2006 From: python-checkins at python.org (tim.peters) Date: Sun, 8 Jan 2006 03:28:42 +0100 (CET) Subject: [Python-checkins] r41969 - python/trunk/Python/ast.c Message-ID: <20060108022842.61A071E4009@bag.python.org> Author: tim.peters Date: Sun Jan 8 03:28:41 2006 New Revision: 41969 Modified: python/trunk/Python/ast.c Log: Repair bizarre indentation created by VC 7.1. Modified: python/trunk/Python/ast.c ============================================================================== --- python/trunk/Python/ast.c (original) +++ python/trunk/Python/ast.c Sun Jan 8 03:28:41 2006 @@ -2030,7 +2030,7 @@ return alias(NEW_IDENTIFIER(CHILD(n, 0)), NULL, c->c_arena); else { /* Create a string of the form "a.b.c" */ - int i; + int i; size_t len; char *s; From python-checkins at python.org Sun Jan 8 04:55:58 2006 From: python-checkins at python.org (martin.v.loewis) Date: Sun, 8 Jan 2006 04:55:58 +0100 (CET) Subject: [Python-checkins] r41970 - python/branches/ssize_t Message-ID: <20060108035558.530B71E4002@bag.python.org> Author: martin.v.loewis Date: Sun Jan 8 04:55:57 2006 New Revision: 41970 Modified: python/branches/ssize_t/ (props changed) Log: Update py:lastmerged to 41964. From python-checkins at python.org Sun Jan 8 06:48:20 2006 From: python-checkins at python.org (neal.norwitz) Date: Sun, 8 Jan 2006 06:48:20 +0100 (CET) Subject: [Python-checkins] r41971 - in python/branches/ssize_t: Include/abstract.h Include/bufferobject.h Include/listobject.h Include/pystrtod.h Include/stringobject.h Include/tupleobject.h Objects/abstract.c Objects/bufferobject.c Objects/classobject.c Objects/codeobject.c Objects/descrobject.c Objects/dictobject.c Objects/enumobject.c Objects/fileobject.c Objects/frameobject.c Objects/funcobject.c Objects/iterobject.c Objects/listobject.c Objects/methodobject.c Objects/object.c Objects/setobject.c Objects/stringobject.c Objects/structseq.c Objects/tupleobject.c Objects/typeobject.c Objects/weakrefobject.c Python/pystrtod.c Message-ID: <20060108054820.15F321E4002@bag.python.org> Author: neal.norwitz Date: Sun Jan 8 06:48:15 2006 New Revision: 41971 Modified: python/branches/ssize_t/Include/abstract.h python/branches/ssize_t/Include/bufferobject.h python/branches/ssize_t/Include/listobject.h python/branches/ssize_t/Include/pystrtod.h python/branches/ssize_t/Include/stringobject.h python/branches/ssize_t/Include/tupleobject.h python/branches/ssize_t/Objects/abstract.c python/branches/ssize_t/Objects/bufferobject.c python/branches/ssize_t/Objects/classobject.c python/branches/ssize_t/Objects/codeobject.c python/branches/ssize_t/Objects/descrobject.c python/branches/ssize_t/Objects/dictobject.c python/branches/ssize_t/Objects/enumobject.c python/branches/ssize_t/Objects/fileobject.c python/branches/ssize_t/Objects/frameobject.c python/branches/ssize_t/Objects/funcobject.c python/branches/ssize_t/Objects/iterobject.c python/branches/ssize_t/Objects/listobject.c python/branches/ssize_t/Objects/methodobject.c python/branches/ssize_t/Objects/object.c python/branches/ssize_t/Objects/setobject.c python/branches/ssize_t/Objects/stringobject.c python/branches/ssize_t/Objects/structseq.c python/branches/ssize_t/Objects/tupleobject.c python/branches/ssize_t/Objects/typeobject.c python/branches/ssize_t/Objects/weakrefobject.c python/branches/ssize_t/Python/pystrtod.c Log: Get rid of a bunch of warnings mostly by converting int -> Py_ssize_t Modified: python/branches/ssize_t/Include/abstract.h ============================================================================== --- python/branches/ssize_t/Include/abstract.h (original) +++ python/branches/ssize_t/Include/abstract.h Sun Jan 8 06:48:15 2006 @@ -422,7 +422,7 @@ PyAPI_FUNC(Py_ssize_t) PyObject_Length(PyObject *o); #define PyObject_Length PyObject_Size - PyAPI_FUNC(int) _PyObject_LengthCue(PyObject *o); + PyAPI_FUNC(Py_ssize_t) _PyObject_LengthCue(PyObject *o); /* Return the size of object o. If the object, o, provides @@ -513,7 +513,7 @@ PyAPI_FUNC(int) PyObject_AsWriteBuffer(PyObject *obj, void **buffer, - int *buffer_len); + Py_ssize_t *buffer_len); /* Takes an arbitrary object which must support the (writeable, Modified: python/branches/ssize_t/Include/bufferobject.h ============================================================================== --- python/branches/ssize_t/Include/bufferobject.h (original) +++ python/branches/ssize_t/Include/bufferobject.h Sun Jan 8 06:48:15 2006 @@ -17,15 +17,15 @@ #define Py_END_OF_BUFFER (-1) PyAPI_FUNC(PyObject *) PyBuffer_FromObject(PyObject *base, - int offset, Py_ssize_t size); + Py_ssize_t offset, Py_ssize_t size); PyAPI_FUNC(PyObject *) PyBuffer_FromReadWriteObject(PyObject *base, - int offset, - Py_ssize_t size); + Py_ssize_t offset, + Py_ssize_t size); PyAPI_FUNC(PyObject *) PyBuffer_FromMemory(void *ptr, Py_ssize_t size); PyAPI_FUNC(PyObject *) PyBuffer_FromReadWriteMemory(void *ptr, Py_ssize_t size); -PyAPI_FUNC(PyObject *) PyBuffer_New(int size); +PyAPI_FUNC(PyObject *) PyBuffer_New(Py_ssize_t size); #ifdef __cplusplus } Modified: python/branches/ssize_t/Include/listobject.h ============================================================================== --- python/branches/ssize_t/Include/listobject.h (original) +++ python/branches/ssize_t/Include/listobject.h Sun Jan 8 06:48:15 2006 @@ -35,7 +35,7 @@ * Items must normally not be NULL, except during construction when * the list is not yet visible outside the function that builds it. */ - int allocated; + Py_ssize_t allocated; } PyListObject; PyAPI_DATA(PyTypeObject) PyList_Type; Modified: python/branches/ssize_t/Include/pystrtod.h ============================================================================== --- python/branches/ssize_t/Include/pystrtod.h (original) +++ python/branches/ssize_t/Include/pystrtod.h Sun Jan 8 06:48:15 2006 @@ -8,7 +8,7 @@ PyAPI_FUNC(double) PyOS_ascii_strtod(const char *str, char **ptr); PyAPI_FUNC(double) PyOS_ascii_atof(const char *str); -PyAPI_FUNC(char *) PyOS_ascii_formatd(char *buffer, int buf_len, const char *format, double d); +PyAPI_FUNC(char *) PyOS_ascii_formatd(char *buffer, size_t buf_len, const char *format, double d); #ifdef __cplusplus Modified: python/branches/ssize_t/Include/stringobject.h ============================================================================== --- python/branches/ssize_t/Include/stringobject.h (original) +++ python/branches/ssize_t/Include/stringobject.h Sun Jan 8 06:48:15 2006 @@ -64,7 +64,7 @@ Py_GCC_ATTRIBUTE((format(printf, 1, 0))); PyAPI_FUNC(PyObject *) PyString_FromFormat(const char*, ...) Py_GCC_ATTRIBUTE((format(printf, 1, 2))); -PyAPI_FUNC(int) PyString_Size(PyObject *); +PyAPI_FUNC(Py_ssize_t) PyString_Size(PyObject *); PyAPI_FUNC(char *) PyString_AsString(PyObject *); PyAPI_FUNC(PyObject *) PyString_Repr(PyObject *, int); PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *); Modified: python/branches/ssize_t/Include/tupleobject.h ============================================================================== --- python/branches/ssize_t/Include/tupleobject.h (original) +++ python/branches/ssize_t/Include/tupleobject.h Sun Jan 8 06:48:15 2006 @@ -41,8 +41,8 @@ PyAPI_FUNC(PyObject *) PyTuple_GetItem(PyObject *, Py_ssize_t); PyAPI_FUNC(int) PyTuple_SetItem(PyObject *, Py_ssize_t, PyObject *); PyAPI_FUNC(PyObject *) PyTuple_GetSlice(PyObject *, Py_ssize_t, Py_ssize_t); -PyAPI_FUNC(int) _PyTuple_Resize(PyObject **, int); -PyAPI_FUNC(PyObject *) PyTuple_Pack(int, ...); +PyAPI_FUNC(int) _PyTuple_Resize(PyObject **, Py_ssize_t); +PyAPI_FUNC(PyObject *) PyTuple_Pack(Py_ssize_t, ...); /* Macro, trading safety for speed */ #define PyTuple_GET_ITEM(op, i) (((PyTupleObject *)(op))->ob_item[i]) Modified: python/branches/ssize_t/Objects/abstract.c ============================================================================== --- python/branches/ssize_t/Objects/abstract.c (original) +++ python/branches/ssize_t/Objects/abstract.c Sun Jan 8 06:48:15 2006 @@ -81,7 +81,7 @@ } #define PyObject_Length PyObject_Size -int +Py_ssize_t _PyObject_LengthCue(PyObject *o) { Py_ssize_t rv = PyObject_Size(o); @@ -94,7 +94,7 @@ PyErr_Fetch(&err_type, &err_value, &err_tb); ro = PyObject_CallMethod(o, "_length_cue", NULL); if (ro != NULL) { - rv = (int)PyInt_AsLong(ro); + rv = PyInt_AsLong(ro); Py_DECREF(ro); Py_XDECREF(err_type); Py_XDECREF(err_value); @@ -297,11 +297,11 @@ int PyObject_AsWriteBuffer(PyObject *obj, void **buffer, - int *buffer_len) + Py_ssize_t *buffer_len) { PyBufferProcs *pb; void*pp; - int len; + Py_ssize_t len; if (obj == NULL || buffer == NULL || buffer_len == NULL) { null_error(); @@ -415,7 +415,7 @@ binaryfunc slot; slot = NB_BINOP(mv, op_slot); if (slot) { - PyObject *x = slot(v, w); + x = slot(v, w); Py_DECREF(v); Py_DECREF(w); return x; @@ -1260,7 +1260,7 @@ } static PyObject * -sliceobj_from_intint(int i, int j) +sliceobj_from_intint(Py_ssize_t i, Py_ssize_t j) { PyObject *start, *end, *slice; start = PyInt_FromLong((long)i); @@ -1438,9 +1438,9 @@ PySequence_Tuple(PyObject *v) { PyObject *it; /* iter(v) */ - int n; /* guess for result tuple size */ + Py_ssize_t n; /* guess for result tuple size */ PyObject *result; - int j; + Py_ssize_t j; if (v == NULL) return null_error(); @@ -1486,7 +1486,7 @@ break; } if (j >= n) { - int oldn = n; + Py_ssize_t oldn = n; /* The over-allocation strategy can grow a bit faster than for lists because unlike lists the over-allocation isn't permanent -- we reclaim @@ -2053,7 +2053,7 @@ abstract_issubclass(PyObject *derived, PyObject *cls) { PyObject *bases; - int i, n; + Py_ssize_t i, n; int r = 0; @@ -2137,7 +2137,7 @@ } } else if (PyTuple_Check(cls)) { - int i, n; + Py_ssize_t i, n; if (!recursion_depth) { PyErr_SetString(PyExc_RuntimeError, @@ -2191,8 +2191,8 @@ return -1; if (PyTuple_Check(cls)) { - int i; - int n = PyTuple_GET_SIZE(cls); + Py_ssize_t i; + Py_ssize_t n = PyTuple_GET_SIZE(cls); if (!recursion_depth) { PyErr_SetString(PyExc_RuntimeError, Modified: python/branches/ssize_t/Objects/bufferobject.c ============================================================================== --- python/branches/ssize_t/Objects/bufferobject.c (original) +++ python/branches/ssize_t/Objects/bufferobject.c Sun Jan 8 06:48:15 2006 @@ -8,15 +8,15 @@ PyObject_HEAD PyObject *b_base; void *b_ptr; - int b_size; - int b_offset; + Py_ssize_t b_size; + Py_ssize_t b_offset; int b_readonly; long b_hash; } PyBufferObject; static int -get_buf(PyBufferObject *self, void **ptr, int *size) +get_buf(PyBufferObject *self, void **ptr, Py_ssize_t *size) { if (self->b_base == NULL) { assert (ptr != NULL); @@ -24,7 +24,7 @@ *size = self->b_size; } else { - int count, offset; + Py_ssize_t count, offset; getreadbufferproc proc; PyBufferProcs *bp = self->b_base->ob_type->tp_as_buffer; if ((*bp->bf_getsegcount)(self->b_base, NULL) != 1) { @@ -56,7 +56,7 @@ static PyObject * -buffer_from_memory(PyObject *base, Py_ssize_t size, int offset, void *ptr, +buffer_from_memory(PyObject *base, Py_ssize_t size, Py_ssize_t offset, void *ptr, int readonly) { PyBufferObject * b; @@ -88,7 +88,7 @@ } static PyObject * -buffer_from_object(PyObject *base, int size, int offset, int readonly) +buffer_from_object(PyObject *base, Py_ssize_t size, Py_ssize_t offset, int readonly) { if (offset < 0) { PyErr_SetString(PyExc_ValueError, @@ -99,7 +99,7 @@ /* another buffer, refer to the base object */ PyBufferObject *b = (PyBufferObject *)base; if (b->b_size != Py_END_OF_BUFFER) { - int base_size = b->b_size - offset; + Py_ssize_t base_size = b->b_size - offset; if (base_size < 0) base_size = 0; if (size == Py_END_OF_BUFFER || size > base_size) @@ -113,7 +113,7 @@ PyObject * -PyBuffer_FromObject(PyObject *base, int offset, Py_ssize_t size) +PyBuffer_FromObject(PyObject *base, Py_ssize_t offset, Py_ssize_t size) { PyBufferProcs *pb = base->ob_type->tp_as_buffer; @@ -129,7 +129,7 @@ } PyObject * -PyBuffer_FromReadWriteObject(PyObject *base, int offset, Py_ssize_t size) +PyBuffer_FromReadWriteObject(PyObject *base, Py_ssize_t offset, Py_ssize_t size) { PyBufferProcs *pb = base->ob_type->tp_as_buffer; @@ -157,7 +157,7 @@ } PyObject * -PyBuffer_New(int size) +PyBuffer_New(Py_ssize_t size) { PyObject *o; PyBufferObject * b; @@ -167,6 +167,7 @@ "size must be zero or positive"); return NULL; } + /* XXX: check for overflow in multiply */ /* Inline PyObject_New */ o = PyObject_MALLOC(sizeof(*b) + size); if ( o == NULL ) @@ -189,13 +190,13 @@ buffer_new(PyTypeObject *type, PyObject *args, PyObject *kw) { PyObject *ob; - int offset = 0; - int size = Py_END_OF_BUFFER; + Py_ssize_t offset = 0; + Py_ssize_t size = Py_END_OF_BUFFER; if (!_PyArg_NoKeywords("buffer()", kw)) return NULL; - if (!PyArg_ParseTuple(args, "O|ii:buffer", &ob, &offset, &size)) + if (!PyArg_ParseTuple(args, "O|ll:buffer", &ob, &offset, &size)) return NULL; return PyBuffer_FromObject(ob, offset, size); } @@ -220,7 +221,8 @@ buffer_compare(PyBufferObject *self, PyBufferObject *other) { void *p1, *p2; - int len_self, len_other, min_len, cmp; + Py_ssize_t len_self, len_other, min_len; + int cmp; if (!get_buf(self, &p1, &len_self)) return -1; @@ -238,17 +240,17 @@ static PyObject * buffer_repr(PyBufferObject *self) { - char *status = self->b_readonly ? "read-only" : "read-write"; + const char *status = self->b_readonly ? "read-only" : "read-write"; if ( self->b_base == NULL ) - return PyString_FromFormat("<%s buffer ptr %p, size %d at %p>", + return PyString_FromFormat("<%s buffer ptr %p, size %ld at %p>", status, self->b_ptr, self->b_size, self); else return PyString_FromFormat( - "<%s buffer for %p, size %d, offset %d at %p>", + "<%s buffer for %p, size %ld, offset %ld at %p>", status, self->b_base, self->b_size, @@ -260,8 +262,8 @@ buffer_hash(PyBufferObject *self) { void *ptr; - int size; - register int len; + Py_ssize_t size; + register Py_ssize_t len; register unsigned char *p; register long x; @@ -300,7 +302,7 @@ buffer_str(PyBufferObject *self) { void *ptr; - int size; + Py_ssize_t size; if (!get_buf(self, &ptr, &size)) return NULL; return PyString_FromStringAndSize(ptr, size); @@ -312,7 +314,7 @@ buffer_length(PyBufferObject *self) { void *ptr; - int size; + Py_ssize_t size; if (!get_buf(self, &ptr, &size)) return -1; return size; @@ -325,7 +327,7 @@ void *ptr1, *ptr2; char *p; PyObject *ob; - int size, count; + Py_ssize_t size, count; if ( pb == NULL || pb->bf_getreadbuffer == NULL || @@ -374,7 +376,7 @@ PyObject *ob; register char *p; void *ptr; - int size; + Py_ssize_t size; if ( count < 0 ) count = 0; @@ -401,7 +403,7 @@ buffer_item(PyBufferObject *self, Py_ssize_t idx) { void *ptr; - int size; + Py_ssize_t size; if (!get_buf(self, &ptr, &size)) return NULL; if ( idx < 0 || idx >= size ) { @@ -415,7 +417,7 @@ buffer_slice(PyBufferObject *self, Py_ssize_t left, Py_ssize_t right) { void *ptr; - int size; + Py_ssize_t size; if (!get_buf(self, &ptr, &size)) return NULL; if ( left < 0 ) @@ -435,8 +437,8 @@ { PyBufferProcs *pb; void *ptr1, *ptr2; - int size; - int count; + Py_ssize_t size; + Py_ssize_t count; if ( self->b_readonly ) { PyErr_SetString(PyExc_TypeError, @@ -486,9 +488,9 @@ { PyBufferProcs *pb; void *ptr1, *ptr2; - int size; - int slice_len; - int count; + Py_ssize_t size; + Py_ssize_t slice_len; + Py_ssize_t count; if ( self->b_readonly ) { PyErr_SetString(PyExc_TypeError, @@ -541,10 +543,10 @@ /* Buffer methods */ -static int +static Py_ssize_t buffer_getreadbuf(PyBufferObject *self, int idx, void **pp) { - int size; + Py_ssize_t size; if ( idx != 0 ) { PyErr_SetString(PyExc_SystemError, "accessing non-existent buffer segment"); @@ -555,7 +557,7 @@ return size; } -static int +static Py_ssize_t buffer_getwritebuf(PyBufferObject *self, int idx, void **pp) { if ( self->b_readonly ) @@ -567,10 +569,10 @@ } static int -buffer_getsegcount(PyBufferObject *self, int *lenp) +buffer_getsegcount(PyBufferObject *self, Py_ssize_t *lenp) { void *ptr; - int size; + Py_ssize_t size; if (!get_buf(self, &ptr, &size)) return -1; if (lenp) @@ -578,11 +580,11 @@ return 1; } -static int -buffer_getcharbuf(PyBufferObject *self, int idx, const char **pp) +static Py_ssize_t +buffer_getcharbuf(PyBufferObject *self, Py_ssize_t idx, const char **pp) { void *ptr; - int size; + Py_ssize_t size; if ( idx != 0 ) { PyErr_SetString(PyExc_SystemError, "accessing non-existent buffer segment"); Modified: python/branches/ssize_t/Objects/classobject.c ============================================================================== --- python/branches/ssize_t/Objects/classobject.c (original) +++ python/branches/ssize_t/Objects/classobject.c Sun Jan 8 06:48:15 2006 @@ -68,7 +68,7 @@ return NULL; } else { - int i, n; + Py_ssize_t i, n; PyObject *base; if (!PyTuple_Check(bases)) { PyErr_SetString(PyExc_TypeError, @@ -185,7 +185,7 @@ static PyObject * class_lookup(PyClassObject *cp, PyObject *name, PyClassObject **pclass) { - int i, n; + Py_ssize_t i, n; PyObject *value = PyDict_GetItem(cp->cl_dict, name); if (value != NULL) { *pclass = cp; @@ -281,7 +281,7 @@ static char * set_bases(PyClassObject *c, PyObject *v) { - int i, n; + Py_ssize_t i, n; if (v == NULL || !PyTuple_Check(v)) return "__bases__ must be a tuple object"; @@ -483,7 +483,7 @@ int PyClass_IsSubclass(PyObject *class, PyObject *base) { - int i, n; + Py_ssize_t i, n; PyClassObject *cp; if (class == base) return 1; @@ -1128,7 +1128,7 @@ } static PyObject * -sliceobj_from_intint(int i, int j) +sliceobj_from_intint(Py_ssize_t i, Py_ssize_t j) { PyObject *start, *end, *res; @@ -2434,7 +2434,7 @@ Py_INCREF(arg); } else { - int argcount = PyTuple_Size(arg); + Py_ssize_t argcount = PyTuple_Size(arg); PyObject *newarg = PyTuple_New(argcount + 1); int i; if (newarg == NULL) Modified: python/branches/ssize_t/Objects/codeobject.c ============================================================================== --- python/branches/ssize_t/Objects/codeobject.c (original) +++ python/branches/ssize_t/Objects/codeobject.c Sun Jan 8 06:48:15 2006 @@ -28,7 +28,7 @@ static void intern_strings(PyObject *tuple) { - int i; + Py_ssize_t i; for (i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); @@ -48,7 +48,7 @@ PyObject *lnotab) { PyCodeObject *co; - int i; + Py_ssize_t i; /* Check argument types */ if (argcount < 0 || nlocals < 0 || code == NULL || @@ -135,7 +135,7 @@ { PyObject *newtuple; PyObject *item; - int i, len; + Py_ssize_t i, len; len = PyTuple_GET_SIZE(tup); newtuple = PyTuple_New(len); Modified: python/branches/ssize_t/Objects/descrobject.c ============================================================================== --- python/branches/ssize_t/Objects/descrobject.c (original) +++ python/branches/ssize_t/Objects/descrobject.c Sun Jan 8 06:48:15 2006 @@ -209,7 +209,7 @@ static PyObject * methoddescr_call(PyMethodDescrObject *descr, PyObject *args, PyObject *kwds) { - int argc; + Py_ssize_t argc; PyObject *self, *func, *result; /* Make sure that the first argument is acceptable as 'self' */ @@ -267,7 +267,7 @@ static PyObject * wrapperdescr_call(PyWrapperDescrObject *descr, PyObject *args, PyObject *kwds) { - int argc; + Py_ssize_t argc; PyObject *self, *func, *result; /* Make sure that the first argument is acceptable as 'self' */ Modified: python/branches/ssize_t/Objects/dictobject.c ============================================================================== --- python/branches/ssize_t/Objects/dictobject.c (original) +++ python/branches/ssize_t/Objects/dictobject.c Sun Jan 8 06:48:15 2006 @@ -217,8 +217,8 @@ static dictentry * lookdict(dictobject *mp, PyObject *key, register long hash) { - register int i; - register unsigned int perturb; + register Py_ssize_t i; + register size_t perturb; register dictentry *freeslot; register unsigned int mask = mp->ma_mask; dictentry *ep0 = mp->ma_table; @@ -328,8 +328,8 @@ static dictentry * lookdict_string(dictobject *mp, PyObject *key, register long hash) { - register int i; - register unsigned int perturb; + register Py_ssize_t i; + register size_t perturb; register dictentry *freeslot; register unsigned int mask = mp->ma_mask; dictentry *ep0 = mp->ma_table; @@ -692,7 +692,8 @@ int PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue) { - register int i, mask; + register Py_ssize_t i; + register int mask; register dictentry *ep; if (!PyDict_Check(op)) @@ -1109,7 +1110,7 @@ PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override) { PyObject *it; /* iter(seq2) */ - int i; /* index into seq2 of current element */ + int i; /* index into seq2 of current element */ PyObject *item; /* seq2[i] */ PyObject *fast; /* item as a 2-tuple or 2-list */ @@ -1123,7 +1124,7 @@ for (i = 0; ; ++i) { PyObject *key, *value; - int n; + Py_ssize_t n; fast = NULL; item = PyIter_Next(it); @@ -1147,7 +1148,7 @@ if (n != 2) { PyErr_Format(PyExc_ValueError, "dictionary update sequence element #%d " - "has length %d; 2 is required", + "has length %ld; 2 is required", i, n); goto Fail; } @@ -2058,7 +2059,7 @@ static PyObject * dictiter_len(dictiterobject *di) { - int len = 0; + long len = 0; if (di->di_dict != NULL && di->di_used == di->di_dict->ma_used) len = di->len; return PyInt_FromLong(len); Modified: python/branches/ssize_t/Objects/enumobject.c ============================================================================== --- python/branches/ssize_t/Objects/enumobject.c (original) +++ python/branches/ssize_t/Objects/enumobject.c Sun Jan 8 06:48:15 2006 @@ -241,7 +241,7 @@ static PyObject * reversed_len(reversedobject *ro) { - int position, seqsize; + Py_ssize_t position, seqsize; if (ro->seq == NULL) return PyInt_FromLong(0); Modified: python/branches/ssize_t/Objects/fileobject.c ============================================================================== --- python/branches/ssize_t/Objects/fileobject.c (original) +++ python/branches/ssize_t/Objects/fileobject.c Sun Jan 8 06:48:15 2006 @@ -137,7 +137,7 @@ static int check_the_mode(char *mode) { - unsigned int len = strlen(mode); + size_t len = strlen(mode); switch (len) { case 0: @@ -1247,7 +1247,7 @@ if (n < 0 && result != NULL && PyString_Check(result)) { char *s = PyString_AS_STRING(result); - int len = PyString_GET_SIZE(result); + Py_ssize_t len = PyString_GET_SIZE(result); if (len == 0) { Py_DECREF(result); result = NULL; @@ -1268,7 +1268,7 @@ #ifdef Py_USING_UNICODE if (n < 0 && result != NULL && PyUnicode_Check(result)) { Py_UNICODE *s = PyUnicode_AS_UNICODE(result); - int len = PyUnicode_GET_SIZE(result); + Py_ssize_t len = PyUnicode_GET_SIZE(result); if (len == 0) { Py_DECREF(result); result = NULL; @@ -1460,8 +1460,8 @@ PyObject *list, *line; PyObject *it; /* iter(seq) */ PyObject *result; - int i, j, index, len, islist; - Py_ssize_t nwritten; + int index, islist; + Py_ssize_t i, j, nwritten, len; assert(seq != NULL); if (f->f_fp == NULL) @@ -1519,7 +1519,6 @@ PyObject *v = PyList_GET_ITEM(list, i); if (!PyString_Check(v)) { const char *buffer; - Py_ssize_t len; if (((f->f_binary && PyObject_AsReadBuffer(v, (const void**)&buffer, @@ -2029,7 +2028,7 @@ int PyFile_SoftSpace(PyObject *f, int newflag) { - int oldflag = 0; + long oldflag = 0; if (f == NULL) { /* Do nothing */ } @@ -2045,6 +2044,7 @@ else { if (PyInt_Check(v)) oldflag = PyInt_AsLong(v); + assert(oldflag < INT_MAX); Py_DECREF(v); } v = PyInt_FromLong((long)newflag); @@ -2056,7 +2056,7 @@ Py_DECREF(v); } } - return oldflag; + return (int)oldflag; } /* Interfaces to write objects/strings to file-like objects */ Modified: python/branches/ssize_t/Objects/frameobject.c ============================================================================== --- python/branches/ssize_t/Objects/frameobject.c (original) +++ python/branches/ssize_t/Objects/frameobject.c Sun Jan 8 06:48:15 2006 @@ -540,7 +540,7 @@ PyFrameObject *back = tstate->frame; PyFrameObject *f; PyObject *builtins; - int extras, ncells, nfrees, i; + Py_ssize_t extras, ncells, nfrees, i; #ifdef Py_DEBUG if (code == NULL || globals == NULL || !PyDict_Check(globals) || @@ -678,10 +678,10 @@ /* Convert between "fast" version of locals and dictionary version */ static void -map_to_dict(PyObject *map, int nmap, PyObject *dict, PyObject **values, - int deref) +map_to_dict(PyObject *map, Py_ssize_t nmap, PyObject *dict, PyObject **values, + Py_ssize_t deref) { - int j; + Py_ssize_t j; for (j = nmap; --j >= 0; ) { PyObject *key = PyTuple_GET_ITEM(map, j); PyObject *value = values[j]; @@ -699,10 +699,10 @@ } static void -dict_to_map(PyObject *map, int nmap, PyObject *dict, PyObject **values, - int deref, int clear) +dict_to_map(PyObject *map, Py_ssize_t nmap, PyObject *dict, PyObject **values, + Py_ssize_t deref, int clear) { - int j; + Py_ssize_t j; for (j = nmap; --j >= 0; ) { PyObject *key = PyTuple_GET_ITEM(map, j); PyObject *value = PyObject_GetItem(dict, key); @@ -733,7 +733,7 @@ PyObject *locals, *map; PyObject **fast; PyObject *error_type, *error_value, *error_traceback; - int j; + Py_ssize_t j; if (f == NULL) return; locals = f->f_locals; @@ -776,7 +776,7 @@ PyObject *locals, *map; PyObject **fast; PyObject *error_type, *error_value, *error_traceback; - int j; + Py_ssize_t j; if (f == NULL) return; locals = f->f_locals; Modified: python/branches/ssize_t/Objects/funcobject.c ============================================================================== --- python/branches/ssize_t/Objects/funcobject.c (original) +++ python/branches/ssize_t/Objects/funcobject.c Sun Jan 8 06:48:15 2006 @@ -232,7 +232,7 @@ func_set_code(PyFunctionObject *op, PyObject *value) { PyObject *tmp; - int nfree, nclosure; + Py_ssize_t nfree, nclosure; if (restricted()) return -1; @@ -363,7 +363,7 @@ PyObject *defaults = Py_None; PyObject *closure = Py_None; PyFunctionObject *newfunc; - int nfree, nclosure; + Py_ssize_t nfree, nclosure; static const char *kwlist[] = {"code", "globals", "name", "argdefs", "closure", 0}; @@ -405,7 +405,7 @@ PyString_AS_STRING(code->co_name), nfree, nclosure); if (nclosure) { - int i; + Py_ssize_t i; for (i = 0; i < nclosure; i++) { PyObject *o = PyTuple_GET_ITEM(closure, i); if (!PyCell_Check(o)) { @@ -516,7 +516,7 @@ PyObject *result; PyObject *argdefs; PyObject **d, **k; - int nk, nd; + Py_ssize_t nk, nd; argdefs = PyFunction_GET_DEFAULTS(func); if (argdefs != NULL && PyTuple_Check(argdefs)) { Modified: python/branches/ssize_t/Objects/iterobject.c ============================================================================== --- python/branches/ssize_t/Objects/iterobject.c (original) +++ python/branches/ssize_t/Objects/iterobject.c Sun Jan 8 06:48:15 2006 @@ -74,7 +74,7 @@ static PyObject * iter_len(seqiterobject *it) { - int seqsize, len; + Py_ssize_t seqsize, len; if (it->it_seq) { seqsize = PySequence_Size(it->it_seq); Modified: python/branches/ssize_t/Objects/listobject.c ============================================================================== --- python/branches/ssize_t/Objects/listobject.c (original) +++ python/branches/ssize_t/Objects/listobject.c Sun Jan 8 06:48:15 2006 @@ -273,12 +273,13 @@ static int list_print(PyListObject *op, FILE *fp, int flags) { + int rc; Py_ssize_t i; - i = Py_ReprEnter((PyObject*)op); - if (i != 0) { - if (i < 0) - return i; + rc = Py_ReprEnter((PyObject*)op); + if (rc != 0) { + if (rc < 0) + return rc; fprintf(fp, "[...]"); return 0; } @@ -1051,7 +1052,7 @@ Returns -1 in case of error. */ -static int +static Py_ssize_t count_run(PyObject **lo, PyObject **hi, PyObject *compare, int *descending) { Py_ssize_t k; @@ -1387,14 +1388,15 @@ * merge, and should have na <= nb. See listsort.txt for more info. * Return 0 if successful, -1 if error. */ -static int -merge_lo(MergeState *ms, PyObject **pa, int na, PyObject **pb, int nb) +static Py_ssize_t +merge_lo(MergeState *ms, PyObject **pa, Py_ssize_t na, + PyObject **pb, Py_ssize_t nb) { Py_ssize_t k; PyObject *compare; PyObject **dest; int result = -1; /* guilty until proved innocent */ - int min_gallop = ms->min_gallop; + Py_ssize_t min_gallop = ms->min_gallop; assert(ms && pa && pb && na > 0 && nb > 0 && pa + na == pb); if (MERGE_GETMEM(ms, na) < 0) @@ -1518,7 +1520,7 @@ * merge, and should have na >= nb. See listsort.txt for more info. * Return 0 if successful, -1 if error. */ -static int +static Py_ssize_t merge_hi(MergeState *ms, PyObject **pa, Py_ssize_t na, PyObject **pb, Py_ssize_t nb) { Py_ssize_t k; @@ -1655,7 +1657,7 @@ /* Merge the two runs at stack indices i and i+1. * Returns 0 on success, -1 on error. */ -static int +static Py_ssize_t merge_at(MergeState *ms, Py_ssize_t i) { PyObject **pa, **pb; @@ -1806,7 +1808,7 @@ static PyTypeObject sortwrapper_type; static PyObject * -sortwrapper_richcompare(sortwrapperobject *a, sortwrapperobject *b, Py_ssize_t op) +sortwrapper_richcompare(sortwrapperobject *a, sortwrapperobject *b, int op) { if (!PyObject_TypeCheck(b, &sortwrapper_type)) { PyErr_SetString(PyExc_TypeError, @@ -2268,13 +2270,13 @@ static int list_traverse(PyListObject *o, visitproc visit, void *arg) { - Py_ssize_t i, err; + Py_ssize_t i; PyObject *x; for (i = o->ob_size; --i >= 0; ) { x = o->ob_item[i]; if (x != NULL) { - err = visit(x, arg); + int err = visit(x, arg); if (err) return err; } @@ -2598,8 +2600,8 @@ if (PySequence_Fast_GET_SIZE(seq) != slicelength) { /* XXX can we use %zd here? */ PyErr_Format(PyExc_ValueError, - "attempt to assign sequence of size %d to extended slice of size %ld", - (int)PySequence_Fast_GET_SIZE(seq), + "attempt to assign sequence of size %ld to extended slice of size %ld", + PySequence_Fast_GET_SIZE(seq), slicelength); Py_DECREF(seq); return -1; Modified: python/branches/ssize_t/Objects/methodobject.c ============================================================================== --- python/branches/ssize_t/Objects/methodobject.c (original) +++ python/branches/ssize_t/Objects/methodobject.c Sun Jan 8 06:48:15 2006 @@ -65,7 +65,7 @@ PyCFunctionObject* f = (PyCFunctionObject*)func; PyCFunction meth = PyCFunction_GET_FUNCTION(func); PyObject *self = PyCFunction_GET_SELF(func); - int size; + long size; switch (PyCFunction_GET_FLAGS(func) & ~(METH_CLASS | METH_STATIC | METH_COEXIST)) { case METH_VARARGS: @@ -81,7 +81,7 @@ if (size == 0) return (*meth)(self, NULL); PyErr_Format(PyExc_TypeError, - "%.200s() takes no arguments (%d given)", + "%.200s() takes no arguments (%ld given)", f->m_ml->ml_name, size); return NULL; } @@ -92,7 +92,7 @@ if (size == 1) return (*meth)(self, PyTuple_GET_ITEM(arg, 0)); PyErr_Format(PyExc_TypeError, - "%.200s() takes exactly one argument (%d given)", + "%.200s() takes exactly one argument (%ld given)", f->m_ml->ml_name, size); return NULL; } Modified: python/branches/ssize_t/Objects/object.c ============================================================================== --- python/branches/ssize_t/Objects/object.c (original) +++ python/branches/ssize_t/Objects/object.c Sun Jan 8 06:48:15 2006 @@ -1175,7 +1175,7 @@ if (dictoffset == 0) return NULL; if (dictoffset < 0) { - int tsize; + Py_ssize_t tsize; size_t size; tsize = ((PyVarObject *)obj)->ob_size; @@ -1237,7 +1237,7 @@ /* Inline _PyType_Lookup */ { - int i, n; + Py_ssize_t i, n; PyObject *mro, *base, *dict; /* Look in tp_dict of types in MRO */ @@ -1278,7 +1278,7 @@ if (dictoffset != 0) { PyObject *dict; if (dictoffset < 0) { - int tsize; + Py_ssize_t tsize; size_t size; tsize = ((PyVarObject *)obj)->ob_size; @@ -1432,7 +1432,7 @@ res = (*v->ob_type->tp_as_sequence->sq_length)(v); else return 1; - return (res > 0) ? 1 : res; + return (res > 0) ? 1 : (int)res; } /* equivalent of 'not v' @@ -1556,7 +1556,7 @@ PyErr_Clear(); else { /* We have no guarantee that bases is a real tuple */ - int i, n; + Py_ssize_t i, n; n = PySequence_Size(bases); /* This better be right */ if (n < 0) PyErr_Clear(); @@ -1992,7 +1992,7 @@ { PyObject *dict; PyObject *list; - int i; + Py_ssize_t i; dict = PyThreadState_GetDict(); if (dict == NULL) @@ -2020,7 +2020,7 @@ { PyObject *dict; PyObject *list; - int i; + Py_ssize_t i; dict = PyThreadState_GetDict(); if (dict == NULL) Modified: python/branches/ssize_t/Objects/setobject.c ============================================================================== --- python/branches/ssize_t/Objects/setobject.c (original) +++ python/branches/ssize_t/Objects/setobject.c Sun Jan 8 06:48:15 2006 @@ -51,8 +51,8 @@ static setentry * set_lookkey(PySetObject *so, PyObject *key, register long hash) { - register int i; - register unsigned int perturb; + register Py_ssize_t i; + register size_t perturb; register setentry *freeslot; register unsigned int mask = so->mask; setentry *table = so->table; @@ -129,8 +129,8 @@ static setentry * set_lookkey_string(PySetObject *so, PyObject *key, register long hash) { - register int i; - register unsigned int perturb; + register Py_ssize_t i; + register size_t perturb; register setentry *freeslot; register unsigned int mask = so->mask; setentry *table = so->table; @@ -753,7 +753,7 @@ static PyObject * setiter_len(setiterobject *si) { - int len = 0; + long len = 0; if (si->si_set != NULL && si->si_used == si->si_set->used) len = si->len; return PyInt_FromLong(len); @@ -847,7 +847,7 @@ return set_merge(so, other); if (PyDict_Check(other)) { - PyObject *key, *value; + PyObject *value; Py_ssize_t pos = 0; while (PyDict_Next(other, &pos, &key, &value)) { if (set_add_key(so, key) == -1) Modified: python/branches/ssize_t/Objects/stringobject.c ============================================================================== --- python/branches/ssize_t/Objects/stringobject.c (original) +++ python/branches/ssize_t/Objects/stringobject.c Sun Jan 8 06:48:15 2006 @@ -666,7 +666,7 @@ return NULL; } -static int +static Py_ssize_t string_getsize(register PyObject *op) { char *s; @@ -686,7 +686,7 @@ return s; } -int +Py_ssize_t PyString_Size(register PyObject *op) { if (!PyString_Check(op)) @@ -1216,7 +1216,7 @@ } } -static int +static Py_ssize_t string_buffer_getreadbuf(PyStringObject *self, /*XXX*/int index, const void **ptr) { if ( index != 0 ) { @@ -1237,15 +1237,15 @@ } static int -string_buffer_getsegcount(PyStringObject *self, int *lenp) +string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp) { if ( lenp ) *lenp = self->ob_size; return 1; } -static int -string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr) +static Py_ssize_t +string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr) { if ( index != 0 ) { PyErr_SetString(PyExc_SystemError, @@ -2894,11 +2894,11 @@ static PyObject * string_center(PyStringObject *self, PyObject *args) { - int marg, left; - int width; + Py_ssize_t marg, left; + long width; char fillchar = ' '; - if (!PyArg_ParseTuple(args, "i|c:center", &width, &fillchar)) + if (!PyArg_ParseTuple(args, "l|c:center", &width, &fillchar)) return NULL; if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) { @@ -2921,12 +2921,12 @@ static PyObject * string_zfill(PyStringObject *self, PyObject *args) { - int fill; + long fill; PyObject *s; char *p; int width; - if (!PyArg_ParseTuple(args, "i:zfill", &width)) + if (!PyArg_ParseTuple(args, "l:zfill", &width)) return NULL; if (PyString_GET_SIZE(self) >= width) { @@ -3228,7 +3228,7 @@ goto onError; for (i = j = 0; i < len; ) { - int eol; + Py_ssize_t eol; /* Find a line and append it */ while (i < len && data[i] != '\n' && data[i] != '\r') @@ -4394,7 +4394,7 @@ { PyObject *keys; PyStringObject *s; - int i, n; + Py_ssize_t i, n; if (interned == NULL || !PyDict_Check(interned)) return; Modified: python/branches/ssize_t/Objects/structseq.c ============================================================================== --- python/branches/ssize_t/Objects/structseq.c (original) +++ python/branches/ssize_t/Objects/structseq.c Sun Jan 8 06:48:15 2006 @@ -40,7 +40,7 @@ static void structseq_dealloc(PyStructSequence *obj) { - int i, size; + Py_ssize_t i, size; size = REAL_SIZE(obj); for (i = 0; i < size; ++i) { @@ -70,7 +70,7 @@ structseq_slice(PyStructSequence *obj, Py_ssize_t low, Py_ssize_t high) { PyTupleObject *np; - int i; + Py_ssize_t i; if (low < 0) low = 0; @@ -96,7 +96,7 @@ PyObject *dict = NULL; PyObject *ob; PyStructSequence *res = NULL; - int len, min_len, max_len, i, n_unnamed_fields; + Py_ssize_t len, min_len, max_len, i, n_unnamed_fields; static const char *kwlist[] = {"sequence", "dict", 0}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:structseq", @@ -125,7 +125,7 @@ if (min_len != max_len) { if (len < min_len) { PyErr_Format(PyExc_TypeError, - "%.500s() takes an at least %d-sequence (%d-sequence given)", + "%.500s() takes an at least %ld-sequence (%ld-sequence given)", type->tp_name, min_len, len); Py_DECREF(arg); return NULL; @@ -133,7 +133,7 @@ if (len > max_len) { PyErr_Format(PyExc_TypeError, - "%.500s() takes an at most %d-sequence (%d-sequence given)", + "%.500s() takes an at most %ld-sequence (%ld-sequence given)", type->tp_name, max_len, len); Py_DECREF(arg); return NULL; @@ -142,7 +142,7 @@ else { if (len != min_len) { PyErr_Format(PyExc_TypeError, - "%.500s() takes a %d-sequence (%d-sequence given)", + "%.500s() takes a %ld-sequence (%ld-sequence given)", type->tp_name, min_len, len); Py_DECREF(arg); return NULL; Modified: python/branches/ssize_t/Objects/tupleobject.c ============================================================================== --- python/branches/ssize_t/Objects/tupleobject.c (original) +++ python/branches/ssize_t/Objects/tupleobject.c Sun Jan 8 06:48:15 2006 @@ -27,7 +27,7 @@ PyTuple_New(register Py_ssize_t size) { register PyTupleObject *op; - int i; + Py_ssize_t i; if (size < 0) { PyErr_BadInternalCall(); return NULL; @@ -57,7 +57,7 @@ else #endif { - int nbytes = size * sizeof(PyObject *); + Py_ssize_t nbytes = size * sizeof(PyObject *); /* Check for overflow */ if (nbytes / sizeof(PyObject *) != (size_t)size || (nbytes += sizeof(PyTupleObject) - sizeof(PyObject *)) @@ -131,9 +131,9 @@ } PyObject * -PyTuple_Pack(int n, ...) +PyTuple_Pack(Py_ssize_t n, ...) { - int i; + Py_ssize_t i; PyObject *o; PyObject *result; PyObject **items; @@ -159,8 +159,8 @@ static void tupledealloc(register PyTupleObject *op) { - register int i; - register int len = op->ob_size; + register Py_ssize_t i; + register Py_ssize_t len = op->ob_size; PyObject_GC_UnTrack(op); Py_TRASHCAN_SAFE_BEGIN(op) if (len > 0) { @@ -187,7 +187,7 @@ static int tupleprint(PyTupleObject *op, FILE *fp, int flags) { - int i; + Py_ssize_t i; fprintf(fp, "("); for (i = 0; i < op->ob_size; i++) { if (i > 0) @@ -204,7 +204,7 @@ static PyObject * tuplerepr(PyTupleObject *v) { - int i, n; + Py_ssize_t i, n; PyObject *s, *temp; PyObject *pieces, *result = NULL; @@ -268,7 +268,7 @@ tuplehash(PyTupleObject *v) { register long x, y; - register int len = v->ob_size; + register Py_ssize_t len = v->ob_size; register PyObject **p; long mult = 1000003L; x = 0x345678L; @@ -295,7 +295,8 @@ static int tuplecontains(PyTupleObject *a, PyObject *el) { - int i, cmp; + Py_ssize_t i; + int cmp; for (i = 0, cmp = 0 ; cmp == 0 && i < a->ob_size; ++i) cmp = PyObject_RichCompareBool(el, PyTuple_GET_ITEM(a, i), @@ -320,8 +321,8 @@ { register PyTupleObject *np; PyObject **src, **dest; - register int i; - int len; + register Py_ssize_t i; + Py_ssize_t len; if (ilow < 0) ilow = 0; if (ihigh > a->ob_size) @@ -359,8 +360,8 @@ static PyObject * tupleconcat(register PyTupleObject *a, register PyObject *bb) { - register int size; - register int i; + register Py_ssize_t size; + register Py_ssize_t i; PyObject **src, **dest; PyTupleObject *np; if (!PyTuple_Check(bb)) { @@ -398,8 +399,8 @@ static PyObject * tuplerepeat(PyTupleObject *a, Py_ssize_t n) { - int i, j; - int size; + Py_ssize_t i, j; + Py_ssize_t size; PyTupleObject *np; PyObject **p, **items; if (n < 0) @@ -435,13 +436,13 @@ static int tupletraverse(PyTupleObject *o, visitproc visit, void *arg) { - int i, err; + Py_ssize_t i; PyObject *x; for (i = o->ob_size; --i >= 0; ) { x = o->ob_item[i]; if (x != NULL) { - err = visit(x, arg); + int err = visit(x, arg); if (err) return err; } @@ -453,8 +454,8 @@ tuplerichcompare(PyObject *v, PyObject *w, int op) { PyTupleObject *vt, *wt; - int i; - int vlen, wlen; + Py_ssize_t i; + Py_ssize_t vlen, wlen; if (!PyTuple_Check(v) || !PyTuple_Check(w)) { Py_INCREF(Py_NotImplemented); @@ -546,7 +547,7 @@ tuple_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *tmp, *new, *item; - int i, n; + Py_ssize_t i, n; assert(PyType_IsSubtype(type, &PyTuple_Type)); tmp = tuple_new(&PyTuple_Type, args, kwds); @@ -702,12 +703,12 @@ known to some other part of the code. */ int -_PyTuple_Resize(PyObject **pv, int newsize) +_PyTuple_Resize(PyObject **pv, Py_ssize_t newsize) { register PyTupleObject *v; register PyTupleObject *sv; - int i; - int oldsize; + Py_ssize_t i; + Py_ssize_t oldsize; v = (PyTupleObject *) *pv; if (v == NULL || v->ob_type != &PyTuple_Type || @@ -849,7 +850,7 @@ static PyObject * tupleiter_len(tupleiterobject *it) { - int len = 0; + long len = 0; if (it->it_seq) len = PyTuple_GET_SIZE(it->it_seq) - it->it_index; return PyInt_FromLong(len); Modified: python/branches/ssize_t/Objects/typeobject.c ============================================================================== --- python/branches/ssize_t/Objects/typeobject.c (original) +++ python/branches/ssize_t/Objects/typeobject.c Sun Jan 8 06:48:15 2006 @@ -145,7 +145,7 @@ { PyTypeObject *subclass; PyObject *ref, *subclasses, *old_mro; - int i, n; + Py_ssize_t i, n; subclasses = type->tp_subclasses; if (subclasses == NULL) @@ -184,7 +184,8 @@ static int type_set_bases(PyTypeObject *type, PyObject *value, void *context) { - int i, r = 0; + Py_ssize_t i; + int r = 0; PyObject *ob, *temp; PyTypeObject *new_base, *old_base; PyObject *old_bases, *old_mro; @@ -483,7 +484,7 @@ static int traverse_slots(PyTypeObject *type, PyObject *self, visitproc visit, void *arg) { - int i, n; + Py_ssize_t i, n; PyMemberDef *mp; n = type->ob_size; @@ -548,7 +549,7 @@ static void clear_slots(PyTypeObject *type, PyObject *self) { - int i, n; + Py_ssize_t i, n; PyMemberDef *mp; n = type->ob_size; @@ -825,7 +826,7 @@ if (mro != NULL) { /* Deal with multiple inheritance without recursion by walking the MRO tuple */ - int i, n; + Py_ssize_t i, n; assert(PyTuple_Check(mro)); n = PyTuple_GET_SIZE(mro); for (i = 0; i < n; i++) { @@ -970,7 +971,7 @@ fill_classic_mro(PyObject *mro, PyObject *cls) { PyObject *bases, *base; - int i, n; + Py_ssize_t i, n; assert(PyList_Check(mro)); assert(PyClass_Check(cls)); @@ -1037,7 +1038,7 @@ static int tail_contains(PyObject *list, int whence, PyObject *o) { - int j, size; + Py_ssize_t j, size; size = PyList_GET_SIZE(list); for (j = whence+1; j < size; j++) { @@ -1068,7 +1069,7 @@ static int check_duplicates(PyObject *list) { - int i, j, n; + Py_ssize_t i, j, n; /* Let's use a quadratic time algorithm, assuming that the bases lists is short. */ @@ -1136,9 +1137,9 @@ static int pmerge(PyObject *acc, PyObject* to_merge) { - int i, j, to_merge_size; + Py_ssize_t i, j, to_merge_size, empty_cnt; int *remain; - int ok, empty_cnt; + int ok; to_merge_size = PyList_GET_SIZE(to_merge); @@ -1206,7 +1207,8 @@ static PyObject * mro_implementation(PyTypeObject *type) { - int i, n, ok; + Py_ssize_t i, n; + int ok; PyObject *bases, *result; PyObject *to_merge, *bases_aslist; @@ -1309,7 +1311,7 @@ if (tuple == NULL) return -1; if (checkit) { - int i, len; + Py_ssize_t i, len; PyObject *cls; PyTypeObject *solid; @@ -1350,7 +1352,7 @@ static PyTypeObject * best_base(PyObject *bases) { - int i, n; + Py_ssize_t i, n; PyTypeObject *base, *winner, *candidate, *base_i; PyObject *base_proto; @@ -1532,7 +1534,7 @@ valid_identifier(PyObject *s) { unsigned char *p; - int i, n; + Py_ssize_t i, n; if (!PyString_Check(s)) { PyErr_SetString(PyExc_TypeError, @@ -1596,7 +1598,7 @@ PyTypeObject *type, *base, *tmptype, *winner; PyHeapTypeObject *et; PyMemberDef *mp; - int i, nbases, nslots, slotoffset, add_dict, add_weak; + Py_ssize_t i, nbases, nslots, slotoffset, add_dict, add_weak; int j, may_add_dict, may_add_weak; assert(args != NULL && PyTuple_Check(args)); @@ -1604,8 +1606,8 @@ /* Special case: type(x) should return x->ob_type */ { - const int nargs = PyTuple_GET_SIZE(args); - const int nkwds = kwds == NULL ? 0 : PyDict_Size(kwds); + const Py_ssize_t nargs = PyTuple_GET_SIZE(args); + const Py_ssize_t nkwds = kwds == NULL ? 0 : PyDict_Size(kwds); if (PyType_CheckExact(metatype) && nargs == 1 && nkwds == 0) { PyObject *x = PyTuple_GET_ITEM(args, 0); @@ -1999,7 +2001,7 @@ PyObject * _PyType_Lookup(PyTypeObject *type, PyObject *name) { - int i, n; + Py_ssize_t i, n; PyObject *mro, *res, *base, *dict; /* Look in tp_dict of types in MRO */ @@ -2154,7 +2156,7 @@ type_subclasses(PyTypeObject *type, PyObject *args_ignored) { PyObject *list, *raw, *ref; - int i, n; + Py_ssize_t i, n; list = PyList_New(0); if (list == NULL) @@ -2587,7 +2589,7 @@ PyObject *getstate = NULL, *state = NULL, *names = NULL; PyObject *slots = NULL, *listitems = NULL, *dictitems = NULL; PyObject *copy_reg = NULL, *newobj = NULL, *res = NULL; - int i, n; + Py_ssize_t i, n; cls = PyObject_GetAttrString(obj, "__class__"); if (cls == NULL) @@ -3155,7 +3157,7 @@ { PyObject *dict, *bases; PyTypeObject *base; - int i, n; + Py_ssize_t i, n; if (type->tp_flags & Py_TPFLAGS_READY) { assert(type->tp_dict != NULL); @@ -3340,7 +3342,7 @@ static void remove_subclass(PyTypeObject *base, PyTypeObject *type) { - int i; + Py_ssize_t i; PyObject *list, *ref; list = base->tp_subclasses; @@ -3547,7 +3549,7 @@ if (i < 0) { PySequenceMethods *sq = self->ob_type->tp_as_sequence; if (sq && sq->sq_length) { - int n = (*sq->sq_length)(self); + Py_ssize_t n = (*sq->sq_length)(self); if (n < 0) return -1; i += n; @@ -3561,7 +3563,7 @@ { ssizeargfunc func = (ssizeargfunc)wrapped; PyObject *arg; - int i; + Py_ssize_t i; if (PyTuple_GET_SIZE(args) == 1) { arg = PyTuple_GET_ITEM(args, 0); @@ -4433,7 +4435,7 @@ { PyObject *func, *args, *res; static PyObject *cmp_str; - int c; + Py_ssize_t c; func = lookup_method(self, "__cmp__", &cmp_str); if (func == NULL) { @@ -4811,7 +4813,7 @@ static PyObject *new_str; PyObject *func; PyObject *newargs, *x; - int i, n; + Py_ssize_t i, n; if (new_str == NULL) { new_str = PyString_InternFromString("__new__"); @@ -5157,9 +5159,10 @@ proper indirection pointer (as_buffer, etc.); it returns NULL if the indirection pointer is NULL. */ static void ** -slotptr(PyTypeObject *type, int offset) +slotptr(PyTypeObject *type, int ioffset) { char *ptr; + long offset = ioffset; /* Note: this depends on the order of the members of PyHeapTypeObject! */ assert(offset >= 0); @@ -5424,7 +5427,7 @@ { PyTypeObject *subclass; PyObject *ref, *subclasses, *dict; - int i, n; + Py_ssize_t i, n; subclasses = type->tp_subclasses; if (subclasses == NULL) @@ -5577,7 +5580,7 @@ PyObject *mro, *res, *tmp, *dict; PyTypeObject *starttype; descrgetfunc f; - int i, n; + Py_ssize_t i, n; starttype = su->obj_type; mro = starttype->tp_mro; Modified: python/branches/ssize_t/Objects/weakrefobject.c ============================================================================== --- python/branches/ssize_t/Objects/weakrefobject.c (original) +++ python/branches/ssize_t/Objects/weakrefobject.c Sun Jan 8 06:48:15 2006 @@ -886,7 +886,7 @@ } if (*list != NULL) { PyWeakReference *current = *list; - int count = _PyWeakref_GetWeakrefCount(current); + Py_ssize_t count = _PyWeakref_GetWeakrefCount(current); int restore_error = PyErr_Occurred() ? 1 : 0; PyObject *err_type, *err_value, *err_tb; @@ -904,7 +904,7 @@ } else { PyObject *tuple = PyTuple_New(count * 2); - int i = 0; + Py_ssize_t i = 0; for (i = 0; i < count; ++i) { PyWeakReference *next = current->wr_next; @@ -920,8 +920,8 @@ PyObject *callback = PyTuple_GET_ITEM(tuple, i * 2 + 1); if (callback != NULL) { - PyObject *current = PyTuple_GET_ITEM(tuple, i * 2); - handle_callback((PyWeakReference *)current, callback); + PyObject *item = PyTuple_GET_ITEM(tuple, i * 2); + handle_callback((PyWeakReference *)item, callback); } } Py_DECREF(tuple); Modified: python/branches/ssize_t/Python/pystrtod.c ============================================================================== --- python/branches/ssize_t/Python/pystrtod.c (original) +++ python/branches/ssize_t/Python/pystrtod.c Sun Jan 8 06:48:15 2006 @@ -159,7 +159,7 @@ **/ char * PyOS_ascii_formatd(char *buffer, - int buf_len, + size_t buf_len, const char *format, double d) { From python-checkins at python.org Sun Jan 8 07:09:15 2006 From: python-checkins at python.org (neal.norwitz) Date: Sun, 8 Jan 2006 07:09:15 +0100 (CET) Subject: [Python-checkins] r41972 - python/branches/ssize_t/Objects/funcobject.c Message-ID: <20060108060915.A69E01E4002@bag.python.org> Author: neal.norwitz Date: Sun Jan 8 07:09:09 2006 New Revision: 41972 Modified: python/branches/ssize_t/Objects/funcobject.c Log: Fix some warnings I introduced by increasing size Modified: python/branches/ssize_t/Objects/funcobject.c ============================================================================== --- python/branches/ssize_t/Objects/funcobject.c (original) +++ python/branches/ssize_t/Objects/funcobject.c Sun Jan 8 07:09:09 2006 @@ -248,8 +248,8 @@ PyTuple_GET_SIZE(op->func_closure)); if (nclosure != nfree) { PyErr_Format(PyExc_ValueError, - "%s() requires a code object with %d free vars," - " not %d", + "%s() requires a code object with %ld free vars," + " not %ld", PyString_AsString(op->func_name), nclosure, nfree); return -1; @@ -401,7 +401,7 @@ nclosure = closure == Py_None ? 0 : PyTuple_GET_SIZE(closure); if (nfree != nclosure) return PyErr_Format(PyExc_ValueError, - "%s requires closure of length %d, not %d", + "%s requires closure of length %ld, not %ld", PyString_AS_STRING(code->co_name), nfree, nclosure); if (nclosure) { From python-checkins at python.org Sun Jan 8 07:13:14 2006 From: python-checkins at python.org (neal.norwitz) Date: Sun, 8 Jan 2006 07:13:14 +0100 (CET) Subject: [Python-checkins] r41973 - python/trunk/Objects/fileobject.c Message-ID: <20060108061314.C3ED11E4002@bag.python.org> Author: neal.norwitz Date: Sun Jan 8 07:13:13 2006 New Revision: 41973 Modified: python/trunk/Objects/fileobject.c Log: strlen() returns a size_t, get rid of 64-bit warning Modified: python/trunk/Objects/fileobject.c ============================================================================== --- python/trunk/Objects/fileobject.c (original) +++ python/trunk/Objects/fileobject.c Sun Jan 8 07:13:13 2006 @@ -136,7 +136,7 @@ static int check_the_mode(char *mode) { - unsigned int len = strlen(mode); + size_t len = strlen(mode); switch (len) { case 0: From python-checkins at python.org Sun Jan 8 07:13:45 2006 From: python-checkins at python.org (neal.norwitz) Date: Sun, 8 Jan 2006 07:13:45 +0100 (CET) Subject: [Python-checkins] r41974 - python/trunk/Objects/abstract.c python/trunk/Objects/setobject.c python/trunk/Objects/weakrefobject.c Message-ID: <20060108061345.12C751E4002@bag.python.org> Author: neal.norwitz Date: Sun Jan 8 07:13:44 2006 New Revision: 41974 Modified: python/trunk/Objects/abstract.c python/trunk/Objects/setobject.c python/trunk/Objects/weakrefobject.c Log: Remove some shadowed variables Modified: python/trunk/Objects/abstract.c ============================================================================== --- python/trunk/Objects/abstract.c (original) +++ python/trunk/Objects/abstract.c Sun Jan 8 07:13:44 2006 @@ -415,7 +415,7 @@ binaryfunc slot; slot = NB_BINOP(mv, op_slot); if (slot) { - PyObject *x = slot(v, w); + x = slot(v, w); Py_DECREF(v); Py_DECREF(w); return x; Modified: python/trunk/Objects/setobject.c ============================================================================== --- python/trunk/Objects/setobject.c (original) +++ python/trunk/Objects/setobject.c Sun Jan 8 07:13:44 2006 @@ -846,7 +846,7 @@ return set_merge(so, other); if (PyDict_Check(other)) { - PyObject *key, *value; + PyObject *value; int pos = 0; while (PyDict_Next(other, &pos, &key, &value)) { if (set_add_key(so, key) == -1) Modified: python/trunk/Objects/weakrefobject.c ============================================================================== --- python/trunk/Objects/weakrefobject.c (original) +++ python/trunk/Objects/weakrefobject.c Sun Jan 8 07:13:44 2006 @@ -920,8 +920,8 @@ PyObject *callback = PyTuple_GET_ITEM(tuple, i * 2 + 1); if (callback != NULL) { - PyObject *current = PyTuple_GET_ITEM(tuple, i * 2); - handle_callback((PyWeakReference *)current, callback); + PyObject *item = PyTuple_GET_ITEM(tuple, i * 2); + handle_callback((PyWeakReference *)item, callback); } } Py_DECREF(tuple); From martin at v.loewis.de Sun Jan 8 10:46:52 2006 From: martin at v.loewis.de (=?ISO-8859-1?Q?=22Martin_v=2E_L=F6wis=22?=) Date: Sun, 08 Jan 2006 10:46:52 +0100 Subject: [Python-checkins] r41972 - python/branches/ssize_t/Objects/funcobject.c In-Reply-To: <20060108060915.A69E01E4002@bag.python.org> References: <20060108060915.A69E01E4002@bag.python.org> Message-ID: <43C0DF8C.7050104@v.loewis.de> neal.norwitz wrote: > PyErr_Format(PyExc_ValueError, > - "%s() requires a code object with %d free vars," > - " not %d", > + "%s() requires a code object with %ld free vars," > + " not %ld", > PyString_AsString(op->func_name), > nclosure, nfree); I'm not sure whether this is the right fix. This says nclosure and nfree are longs; this is true on most 32-bit systems (where ssize_t is int and int and long have the same width), and on some 64-bit systems (where ssize_t and long are the same). It does not work for Win64 (where ssize_t is larger than long). I'm tempted to use %zd in these places, but fear that it isn't portable enough (it *is* part of C99). Regards, Martin From python-checkins at python.org Sun Jan 8 11:07:35 2006 From: python-checkins at python.org (martin.v.loewis) Date: Sun, 8 Jan 2006 11:07:35 +0100 (CET) Subject: [Python-checkins] r41975 - in python/trunk: Misc/NEWS configure configure.in Message-ID: <20060108100735.4EC751E4002@bag.python.org> Author: martin.v.loewis Date: Sun Jan 8 11:07:33 2006 New Revision: 41975 Modified: python/trunk/Misc/NEWS python/trunk/configure python/trunk/configure.in Log: Patch #881820: look for openpty and forkpty also in libbsd. Will backport. Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Sun Jan 8 11:07:33 2006 @@ -633,6 +633,8 @@ Build ----- +- Patch #881820: look for openpty and forkpty also in libbsd. + - The sources of zlib are now part of the Python distribution (zlib 1.2.3). The zlib module is now builtin on Windows. Modified: python/trunk/configure ============================================================================== --- python/trunk/configure (original) +++ python/trunk/configure Sun Jan 8 11:07:33 2006 @@ -1,5 +1,5 @@ #! /bin/sh -# From configure.in Revision: 41925 . +# From configure.in Revision: 41953 . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.59 for python 2.5. # @@ -14741,7 +14741,81 @@ #define HAVE_OPENPTY 1 _ACEOF LIBS="$LIBS -lutil" +else + echo "$as_me:$LINENO: checking for openpty in -lbsd" >&5 +echo $ECHO_N "checking for openpty in -lbsd... $ECHO_C" >&6 +if test "${ac_cv_lib_bsd_openpty+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lbsd $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char openpty (); +int +main () +{ +openpty (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_bsd_openpty=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_lib_bsd_openpty=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS fi +echo "$as_me:$LINENO: result: $ac_cv_lib_bsd_openpty" >&5 +echo "${ECHO_T}$ac_cv_lib_bsd_openpty" >&6 +if test $ac_cv_lib_bsd_openpty = yes; then + cat >>confdefs.h <<\_ACEOF +#define HAVE_OPENPTY 1 +_ACEOF + LIBS="$LIBS -lbsd" +fi + + +fi + fi done @@ -14909,7 +14983,81 @@ #define HAVE_FORKPTY 1 _ACEOF LIBS="$LIBS -lutil" +else + echo "$as_me:$LINENO: checking for forkpty in -lbsd" >&5 +echo $ECHO_N "checking for forkpty in -lbsd... $ECHO_C" >&6 +if test "${ac_cv_lib_bsd_forkpty+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lbsd $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char forkpty (); +int +main () +{ +forkpty (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_bsd_forkpty=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_lib_bsd_forkpty=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS fi +echo "$as_me:$LINENO: result: $ac_cv_lib_bsd_forkpty" >&5 +echo "${ECHO_T}$ac_cv_lib_bsd_forkpty" >&6 +if test $ac_cv_lib_bsd_forkpty = yes; then + cat >>confdefs.h <<\_ACEOF +#define HAVE_FORKPTY 1 +_ACEOF + LIBS="$LIBS -lbsd" +fi + + +fi + fi done Modified: python/trunk/configure.in ============================================================================== --- python/trunk/configure.in (original) +++ python/trunk/configure.in Sun Jan 8 11:07:33 2006 @@ -2273,8 +2273,18 @@ # check for openpty and forkpty -AC_CHECK_FUNCS(openpty,, AC_CHECK_LIB(util,openpty, [AC_DEFINE(HAVE_OPENPTY) LIBS="$LIBS -lutil"])) -AC_CHECK_FUNCS(forkpty,, AC_CHECK_LIB(util,forkpty, [AC_DEFINE(HAVE_FORKPTY) LIBS="$LIBS -lutil"])) +AC_CHECK_FUNCS(openpty,, + AC_CHECK_LIB(util,openpty, + [AC_DEFINE(HAVE_OPENPTY) LIBS="$LIBS -lutil"], + AC_CHECK_LIB(bsd,openpty, [AC_DEFINE(HAVE_OPENPTY) LIBS="$LIBS -lbsd"]) + ) +) +AC_CHECK_FUNCS(forkpty,, + AC_CHECK_LIB(util,forkpty, + [AC_DEFINE(HAVE_FORKPTY) LIBS="$LIBS -lutil"], + AC_CHECK_LIB(bsd,forkpty, [AC_DEFINE(HAVE_FORKPTY) LIBS="$LIBS -lbsd"]) + ) +) # check for long file support functions AC_CHECK_FUNCS(fseek64 fseeko fstatvfs ftell64 ftello statvfs) From python-checkins at python.org Sun Jan 8 11:07:59 2006 From: python-checkins at python.org (martin.v.loewis) Date: Sun, 8 Jan 2006 11:07:59 +0100 (CET) Subject: [Python-checkins] r41976 - in python/branches/release24-maint: Misc/NEWS configure configure.in Message-ID: <20060108100759.62BEC1E4002@bag.python.org> Author: martin.v.loewis Date: Sun Jan 8 11:07:57 2006 New Revision: 41976 Modified: python/branches/release24-maint/Misc/NEWS python/branches/release24-maint/configure python/branches/release24-maint/configure.in Log: Patch #881820: look for openpty and forkpty also in libbsd. Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Sun Jan 8 11:07:57 2006 @@ -115,6 +115,8 @@ Build ----- +- Patch #881820: look for openpty and forkpty also in libbsd. + - Use -xcode=pic32 for CCSHARED on Solaris with SunPro. - The Windows .msi files are now compressed using lzx:21. This produces a Modified: python/branches/release24-maint/configure ============================================================================== --- python/branches/release24-maint/configure (original) +++ python/branches/release24-maint/configure Sun Jan 8 11:07:57 2006 @@ -1,5 +1,5 @@ #! /bin/sh -# From configure.in Revision: 41546 . +# From configure.in Revision: 41853 . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.59 for python 2.4. # @@ -14689,7 +14689,81 @@ #define HAVE_OPENPTY 1 _ACEOF LIBS="$LIBS -lutil" +else + echo "$as_me:$LINENO: checking for openpty in -lbsd" >&5 +echo $ECHO_N "checking for openpty in -lbsd... $ECHO_C" >&6 +if test "${ac_cv_lib_bsd_openpty+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lbsd $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char openpty (); +int +main () +{ +openpty (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_bsd_openpty=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_lib_bsd_openpty=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS fi +echo "$as_me:$LINENO: result: $ac_cv_lib_bsd_openpty" >&5 +echo "${ECHO_T}$ac_cv_lib_bsd_openpty" >&6 +if test $ac_cv_lib_bsd_openpty = yes; then + cat >>confdefs.h <<\_ACEOF +#define HAVE_OPENPTY 1 +_ACEOF + LIBS="$LIBS -lbsd" +fi + + +fi + fi done @@ -14857,7 +14931,81 @@ #define HAVE_FORKPTY 1 _ACEOF LIBS="$LIBS -lutil" +else + echo "$as_me:$LINENO: checking for forkpty in -lbsd" >&5 +echo $ECHO_N "checking for forkpty in -lbsd... $ECHO_C" >&6 +if test "${ac_cv_lib_bsd_forkpty+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lbsd $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char forkpty (); +int +main () +{ +forkpty (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_bsd_forkpty=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_lib_bsd_forkpty=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS fi +echo "$as_me:$LINENO: result: $ac_cv_lib_bsd_forkpty" >&5 +echo "${ECHO_T}$ac_cv_lib_bsd_forkpty" >&6 +if test $ac_cv_lib_bsd_forkpty = yes; then + cat >>confdefs.h <<\_ACEOF +#define HAVE_FORKPTY 1 +_ACEOF + LIBS="$LIBS -lbsd" +fi + + +fi + fi done Modified: python/branches/release24-maint/configure.in ============================================================================== --- python/branches/release24-maint/configure.in (original) +++ python/branches/release24-maint/configure.in Sun Jan 8 11:07:57 2006 @@ -2258,8 +2258,18 @@ # check for openpty and forkpty -AC_CHECK_FUNCS(openpty,, AC_CHECK_LIB(util,openpty, [AC_DEFINE(HAVE_OPENPTY) LIBS="$LIBS -lutil"])) -AC_CHECK_FUNCS(forkpty,, AC_CHECK_LIB(util,forkpty, [AC_DEFINE(HAVE_FORKPTY) LIBS="$LIBS -lutil"])) +AC_CHECK_FUNCS(openpty,, + AC_CHECK_LIB(util,openpty, + [AC_DEFINE(HAVE_OPENPTY) LIBS="$LIBS -lutil"], + AC_CHECK_LIB(bsd,openpty, [AC_DEFINE(HAVE_OPENPTY) LIBS="$LIBS -lbsd"]) + ) +) +AC_CHECK_FUNCS(forkpty,, + AC_CHECK_LIB(util,forkpty, + [AC_DEFINE(HAVE_FORKPTY) LIBS="$LIBS -lutil"], + AC_CHECK_LIB(bsd,forkpty, [AC_DEFINE(HAVE_FORKPTY) LIBS="$LIBS -lbsd"]) + ) +) # check for long file support functions AC_CHECK_FUNCS(fseek64 fseeko fstatvfs ftell64 ftello statvfs) From python-checkins at python.org Sun Jan 8 11:45:41 2006 From: python-checkins at python.org (martin.v.loewis) Date: Sun, 8 Jan 2006 11:45:41 +0100 (CET) Subject: [Python-checkins] r41977 - in python/trunk: Doc/lib/libcodecs.tex Lib/encodings/utf_8_sig.py Lib/test/test_codecs.py Misc/NEWS Message-ID: <20060108104541.1A66A1E4002@bag.python.org> Author: martin.v.loewis Date: Sun Jan 8 11:45:39 2006 New Revision: 41977 Added: python/trunk/Lib/encodings/utf_8_sig.py Modified: python/trunk/Doc/lib/libcodecs.tex python/trunk/Lib/test/test_codecs.py python/trunk/Misc/NEWS Log: Patch #1177307: UTF-8-Sig codec. Modified: python/trunk/Doc/lib/libcodecs.tex ============================================================================== --- python/trunk/Doc/lib/libcodecs.tex (original) +++ python/trunk/Doc/lib/libcodecs.tex Sun Jan 8 11:45:39 2006 @@ -522,6 +522,113 @@ \class{StreamReader} and \class{StreamWriter} classes. They inherit all other methods and attribute from the underlying stream. +\subsection{Encodings and Unicode\label{encodings-overview}} + +Unicode strings are stored internally as sequences of codepoints (to +be precise as Py_UNICODE arrays). Depending on the way Python is +compiled (either via --enable-unicode=ucs2 or --enable-unicode=ucs4, +with the former being the default) Py_UNICODE is either a 16-bit or +32-bit data type. Once a Unicode object is used outside of CPU and +memory, CPU endianness and how these arrays are stored as bytes become +an issue. Transforming a unicode object into a sequence of bytes is +called encoding and recreating the unicode object from the sequence of +bytes is known as decoding. There are many different methods how this +transformation can be done (these methods are also called encodings). +The simplest method is to map the codepoints 0-255 to the bytes +0x0-0xff. This means that a unicode object that contains codepoints +above U+00FF can't be encoded with this method (which is called +'latin-1' or 'iso-8859-1'). unicode.encode() will raise a +UnicodeEncodeError that looks like this: UnicodeEncodeError: 'latin-1' +codec can't encode character u'\u1234' in position 3: ordinal not in +range(256) + +There's another group of encodings (the so called charmap encodings) +that choose a different subset of all unicode code points and how +these codepoints are mapped to the bytes 0x0-0xff. To see how this is +done simply open e.g. encodings/cp1252.py (which is an encoding that +is used primarily on Windows). There's string constant with 256 +characters that shows you which character is mapped to which byte +value. + +All of these encodings can only encode 256 of the 65536 (or 1114111) +codepoints defined in unicode. A simple and straightforward way that +can store each Unicode code point, is to store each codepoint as two +consecutive bytes. There are two possibilities: Store the bytes in big +endian or in little endian order. These two encodings are called +UTF-16-BE and UTF-16-LE respectively. Their disadvantage is that if +e.g. you use UTF-16-BE on a little endian machine you will always have +to swap bytes on encoding and decoding. UTF-16 avoids this problem: +Bytes will always be in natural endianness. When these bytes are read +by a CPU with a different endianness, then bytes have to be swapped +though. To be able to detect the endianness of a UTF-16 byte sequence, +there's the so called BOM (the "Byte Order Mark"). This is the Unicode +character U+FEFF. This character will be prepended to every UTF-16 +byte sequence. The byte swapped version of this character (0xFFFE) is +an illegal character that may not appear in a Unicode text. So when +the first character in an UTF-16 byte sequence appears to be a U+FFFE +the bytes have to be swapped on decoding. Unfortunately upto Unicode +4.0 the character U+FEFF had a second purpose as a "ZERO WIDTH +NO-BREAK SPACE": A character that has no width and doesn't allow a +word to be split. It can e.g. be used to give hints to a ligature +algorithm. With Unicode 4.0 using U+FEFF as a ZERO WIDTH NO-BREAK +SPACE has been deprecated (with U+2060 (WORD JOINER) assuming this +role). Nevertheless Unicode software still must be able to handle +U+FEFF in both roles: As a BOM it's a device to determine the storage +layout of the encoded bytes, and vanishes once the byte sequence has +been decoded into a Unicode string; as a ZERO WIDTH NO-BREAK SPACE +it's a normal character that will be decoded like any other. + +There's another encoding that is able to encoding the full range of +Unicode characters: UTF-8. UTF-8 is an 8bit encoding, which means +there are no issues with byte order in UTF-8. Each byte in a UTF-8 +byte sequence consists of two parts: Marker bits (the most significant +bits) and payload bits. The marker bits are a sequence of zero to six +1 bits followed by a 0 bit. Unicode characters are encoded like this +(with x being a payload bit, which when concatenated give the Unicode +character): + +\begin{tableii}{l|l}{textrm}{}{Range}{Encoding} +\lineii{U-00000000 ... U-0000007F}{0xxxxxxx} +\lineii{U-00000080 ... U-000007FF}{110xxxxx 10xxxxxx} +\lineii{U-00000800 ... U-0000FFFF}{1110xxxx 10xxxxxx 10xxxxxx} +\lineii{U-00010000 ... U-001FFFFF}{11110xxx 10xxxxxx 10xxxxxx 10xxxxxx} +\lineii{U-00200000 ... U-03FFFFFF}{111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx} +\lineii{U-04000000 ... U-7FFFFFFF}{1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx} +\end{tableii} + +The least significant bit of the Unicode character is the rightmost x +bit. + +As UTF-8 is an 8bit encoding no BOM is required and any U+FEFF +character in the decoded Unicode string (even if it's the first +character) is treated as a ZERO WIDTH NO-BREAK SPACE. + +Without external information it's impossible to reliably determine +which encoding was used for encoding a Unicode string. Each charmap +encoding can decode any random byte sequence. However that's not +possible with UTF-8, as UTF-8 byte sequences have a structure that +doesn't allow arbitrary byte sequence. To increase the reliability +with which an UTF-8 encoding can be detected, Microsoft invented a +variant of UTF-8 (that Python 2.5 calls "utf-8-sig") for its Notepad +program: Before any of the Unicode characters is written to the file, +a UTF-8 encoded BOM (which looks like this as a byte sequence: 0xef, +0xbb, 0xbf) is written. As it's rather improbably that any charmap +encoded file starts with these byte values (which would e.g. map to + + LATIN SMALL LETTER I WITH DIAERESIS + RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + INVERTED QUESTION MARK + +in iso-8859-1), this increases the probability that a utf-8-sig +encoding can be correctly guessed from the byte sequence. So here the +BOM is not used to be able to determine the byte order used for +generating the byte sequence, but as a signature that helps in +guessing the encoding. On encoding the utf-8-sig codec will write +0xef, 0xbb, 0xbf as the first three bytes to the file. On decoding +utf-8-sig will skip those three bytes if they appear as the first +three bytes in the file. + + \subsection{Standard Encodings\label{standard-encodings}} Python comes with a number of codecs builtin, either implemented as C @@ -890,6 +997,10 @@ {U8, UTF, utf8} {all languages} +\lineiii{utf_8_sig} + {} + {all languages} + \end{longtableiii} A number of codecs are specific to Python, so their codec names have @@ -1058,3 +1169,17 @@ \begin{funcdesc}{ToUnicode}{label} Convert a label to Unicode, as specified in \rfc{3490}. \end{funcdesc} + + \subsection{\module{encodings.utf_8_sig} --- + UTF-8 codec with BOM signature} +\declaremodule{standard}{encodings.utf-8-sig} % XXX utf_8_sig gives TeX errors +\modulesynopsis{UTF-8 codec with BOM signature} +\moduleauthor{Walter D\"orwald} + +\versionadded{2.5} + +This module implements a variant of the UTF-8 codec: On encoding a +UTF-8 encoded BOM will be prepended to the UTF-8 encoded bytes. For +the stateful encoder this is only done once (on the first write to the +byte stream). For decoding an optional UTF-8 encoded BOM at the start +of the data will be skipped. Added: python/trunk/Lib/encodings/utf_8_sig.py ============================================================================== --- (empty file) +++ python/trunk/Lib/encodings/utf_8_sig.py Sun Jan 8 11:45:39 2006 @@ -0,0 +1,57 @@ +""" Python 'utf-8-sig' Codec +This work similar to UTF-8 with the following changes: + +* On encoding/writing a UTF-8 encoded BOM will be prepended/written as the + first three bytes. + +* On decoding/reading if the first three bytes are a UTF-8 encoded BOM, these + bytes will be skipped. +""" +import codecs + +### Codec APIs + +def encode(input, errors='strict'): + return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input)) + +def decode(input, errors='strict'): + prefix = 0 + if input.startswith(codecs.BOM_UTF8): + input = input[3:] + prefix = 3 + (output, consumed) = codecs.utf_8_decode(input, errors, True) + return (output, consumed+prefix) + +class StreamWriter(codecs.StreamWriter): + def reset(self): + codecs.StreamWriter.reset(self) + try: + del self.encode + except AttributeError: + pass + + def encode(self, input, errors='strict'): + self.encode = codecs.utf_8_encode + return encode(input, errors) + +class StreamReader(codecs.StreamReader): + def reset(self): + codecs.StreamReader.reset(self) + try: + del self.decode + except AttributeError: + pass + + def decode(self, input, errors='strict'): + if len(input) < 3 and codecs.BOM_UTF8.startswith(input): + # not enough data to decide if this is a BOM + # => try again on the next call + return (u"", 0) + self.decode = codecs.utf_8_decode + return decode(input, errors) + +### encodings module API + +def getregentry(): + + return (encode,decode,StreamReader,StreamWriter) Modified: python/trunk/Lib/test/test_codecs.py ============================================================================== --- python/trunk/Lib/test/test_codecs.py (original) +++ python/trunk/Lib/test/test_codecs.py Sun Jan 8 11:45:39 2006 @@ -367,6 +367,33 @@ self.assertRaises(TypeError, codecs.charbuffer_encode) self.assertRaises(TypeError, codecs.charbuffer_encode, 42) +class UTF8SigTest(ReadTest): + encoding = "utf-8-sig" + + def test_partial(self): + self.check_partial( + u"\ufeff\x00\xff\u07ff\u0800\uffff", + [ + u"", + u"", + u"", # First BOM has been read and skipped + u"", + u"", + u"\ufeff", # Second BOM has been read and emitted + u"\ufeff\x00", # "\x00" read and emitted + u"\ufeff\x00", # First byte of encoded u"\xff" read + u"\ufeff\x00\xff", # Second byte of encoded u"\xff" read + u"\ufeff\x00\xff", # First byte of encoded u"\u07ff" read + u"\ufeff\x00\xff\u07ff", # Second byte of encoded u"\u07ff" read + u"\ufeff\x00\xff\u07ff", + u"\ufeff\x00\xff\u07ff", + u"\ufeff\x00\xff\u07ff\u0800", + u"\ufeff\x00\xff\u07ff\u0800", + u"\ufeff\x00\xff\u07ff\u0800", + u"\ufeff\x00\xff\u07ff\u0800\uffff", + ] + ) + class EscapeDecodeTest(unittest.TestCase): def test_empty(self): self.assertEquals(codecs.escape_decode(""), ("", 0)) @@ -1044,6 +1071,7 @@ UTF16LETest, UTF16BETest, UTF8Test, + UTF8SigTest, UTF7Test, UTF16ExTest, ReadBufferTest, Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Sun Jan 8 11:45:39 2006 @@ -319,6 +319,8 @@ Library ------- +- Patch #1177307: Added a new codec utf_8_sig for UTF-8 with a BOM signature. + - Patch #1157027: cookielib mishandles RFC 2109 cookies in Netscape mode - Patch #1117398: cookielib.LWPCookieJar and .MozillaCookieJar now raise @@ -674,7 +676,6 @@ Tests for sanity in tzname when HAVE_TZNAME defined were also defined. Closes bug #1096244. Thanks Gregory Bond. - C API ----- From python-checkins at python.org Sun Jan 8 11:48:54 2006 From: python-checkins at python.org (martin.v.loewis) Date: Sun, 8 Jan 2006 11:48:54 +0100 (CET) Subject: [Python-checkins] r41978 - python/trunk/Lib/distutils/command/upload.py Message-ID: <20060108104854.C9FD81E4002@bag.python.org> Author: martin.v.loewis Date: Sun Jan 8 11:48:54 2006 New Revision: 41978 Modified: python/trunk/Lib/distutils/command/upload.py Log: Patch #1299675: Pass metadata in upload. Modified: python/trunk/Lib/distutils/command/upload.py ============================================================================== --- python/trunk/Lib/distutils/command/upload.py (original) +++ python/trunk/Lib/distutils/command/upload.py Sun Jan 8 11:48:54 2006 @@ -70,17 +70,41 @@ spawn(("gpg", "--detach-sign", "-a", filename), dry_run=self.dry_run) - # Fill in the data + # Fill in the data - send all the meta-data in case we need to + # register a new release content = open(filename,'rb').read() + meta = self.distribution.metadata data = { - ':action':'file_upload', - 'protcol_version':'1', - 'name':self.distribution.get_name(), - 'version':self.distribution.get_version(), - 'content':(os.path.basename(filename),content), - 'filetype':command, - 'pyversion':pyversion, - 'md5_digest':md5(content).hexdigest(), + # action + ':action': 'file_upload', + 'protcol_version': '1', + + # identify release + 'name': meta.get_name(), + 'version': meta.get_version(), + + # file content + 'content': (os.path.basename(filename),content), + 'filetype': command, + 'pyversion': pyversion, + 'md5_digest': md5(content).hexdigest(), + + # additional meta-data + 'metadata_version' : '1.0', + 'summary': meta.get_description(), + 'home_page': meta.get_url(), + 'author': meta.get_contact(), + 'author_email': meta.get_contact_email(), + 'license': meta.get_licence(), + 'description': meta.get_long_description(), + 'keywords': meta.get_keywords(), + 'platform': meta.get_platforms(), + 'classifiers': meta.get_classifiers(), + 'download_url': meta.get_download_url(), + # PEP 314 + 'provides': meta.get_provides(), + 'requires': meta.get_requires(), + 'obsoletes': meta.get_obsoletes(), } comment = '' if command == 'bdist_rpm': From python-checkins at python.org Sun Jan 8 15:32:25 2006 From: python-checkins at python.org (reinhold.birkenfeld) Date: Sun, 8 Jan 2006 15:32:25 +0100 (CET) Subject: [Python-checkins] r41979 - python/trunk/Lib/test/outstanding_bugs.py Message-ID: <20060108143225.DBC341E4002@bag.python.org> Author: reinhold.birkenfeld Date: Sun Jan 8 15:32:19 2006 New Revision: 41979 Added: python/trunk/Lib/test/outstanding_bugs.py Log: Add a test file (which isn't run by regrtest) for bugs which aren't fixed yet. Includes a first test (for compiler). Added: python/trunk/Lib/test/outstanding_bugs.py ============================================================================== --- (empty file) +++ python/trunk/Lib/test/outstanding_bugs.py Sun Jan 8 15:32:19 2006 @@ -0,0 +1,24 @@ +# +# This file is for everybody to add tests for bugs that aren't +# fixed yet. Please add a test case and appropriate bug description. +# +# When you fix one of the bugs, please move the test to the correct +# test_ module. +# + +import unittest +from test import test_support + +class TestBug1385040(unittest.TestCase): + def testSyntaxError(self): + import compiler + + # The following snippet gives a SyntaxError in the interpreter + # + # If you compile and exec it, the call foo(7) returns (7, 1) + self.assertRaises(SyntaxError, compiler.compile, + "def foo(a=1, b): return a, b\n\n", "", "exec") + + +def test_main(): + test_support.run_unittest(TestBug1385040) From python-checkins at python.org Sun Jan 8 22:12:10 2006 From: python-checkins at python.org (reinhold.birkenfeld) Date: Sun, 8 Jan 2006 22:12:10 +0100 (CET) Subject: [Python-checkins] r41980 - peps/trunk/pep-0000.txt peps/trunk/pep-0340.txt peps/trunk/pep-0341.txt Message-ID: <20060108211210.40A9C1E4002@bag.python.org> Author: reinhold.birkenfeld Date: Sun Jan 8 22:12:09 2006 New Revision: 41980 Modified: peps/trunk/pep-0000.txt peps/trunk/pep-0340.txt peps/trunk/pep-0341.txt Log: s/Birkenfeld/Brandl/ Modified: peps/trunk/pep-0000.txt ============================================================================== --- peps/trunk/pep-0000.txt (original) +++ peps/trunk/pep-0000.txt Sun Jan 8 22:12:09 2006 @@ -66,7 +66,7 @@ SA 308 Conditional Expressions GvR, Hettinger SA 328 Imports: Multi-Line and Absolute/Relative Aahz - SA 341 Unifying try-except and try-finally Birkenfeld + SA 341 Unifying try-except and try-finally Brandl SA 342 Coroutines via Enhanced Generators GvR, Eby Open PEPs (under consideration) @@ -388,7 +388,7 @@ S 338 Executing modules inside packages with '-m' Coghlan I 339 How to Change CPython's Bytecode Cannon SR 340 Anonymous Block Statements GvR - SA 341 Unifying try-except and try-finally Birkenfeld + SA 341 Unifying try-except and try-finally Brandl SA 342 Coroutines via Enhanced Generators GvR, Eby S 343 Anonymous Block Redux and Generator Enhancements GvR S 344 Exception Chaining and Embedded Tracebacks Yee @@ -431,7 +431,7 @@ Batista, Facundo facundo at taniquetil.com.ar Baxter, Anthony anthony at interlink.com.au Bellman, Thomas bellman+pep-divmod at lysator.liu.se - Birkenfeld, Reinhold reinhold-birkenfeld-nospam at wolke7.net + Brandl, Georg g.brandl at gmx.net Cannon, Brett brett at python.org Carlson, Josiah jcarlson at uci.edu Carroll, W Isaac icarroll at pobox.com Modified: peps/trunk/pep-0340.txt ============================================================================== --- peps/trunk/pep-0340.txt (original) +++ peps/trunk/pep-0340.txt Sun Jan 8 22:12:09 2006 @@ -544,7 +544,7 @@ Booth, Fredrik Lundh, Greg Ewing, Holger Krekel, Jason Diamond, Jim Jewett, Josiah Carlson, Ka-Ping Yee, Michael Chermside, Michael Hudson, Neil Schemenauer, Nick Coghlan, Paul Moore, - Phillip Eby, Raymond Hettinger, Reinhold Birkenfeld, Samuele + Phillip Eby, Raymond Hettinger, Georg Brandl, Samuele Pedroni, Shannon Behrens, Skip Montanaro, Steven Bethard, Terry Reedy, Tim Delaney, Aahz, and others. Thanks all for the valuable contributions! Modified: peps/trunk/pep-0341.txt ============================================================================== --- peps/trunk/pep-0341.txt (original) +++ peps/trunk/pep-0341.txt Sun Jan 8 22:12:09 2006 @@ -2,7 +2,7 @@ Title: Unifying try-except and try-finally Version: $Revision$ Last-Modified: $Date$ -Author: Reinhold Birkenfeld +Author: Georg Brandl Status: Accepted Type: Standards Track Content-Type: text/plain From python-checkins at python.org Sun Jan 8 22:13:38 2006 From: python-checkins at python.org (reinhold.birkenfeld) Date: Sun, 8 Jan 2006 22:13:38 +0100 (CET) Subject: [Python-checkins] r41981 - python/trunk/Misc/ACKS Message-ID: <20060108211338.6E4D11E4011@bag.python.org> Author: reinhold.birkenfeld Date: Sun Jan 8 22:13:38 2006 New Revision: 41981 Modified: python/trunk/Misc/ACKS Log: Add myself. Modified: python/trunk/Misc/ACKS ============================================================================== --- python/trunk/Misc/ACKS (original) +++ python/trunk/Misc/ACKS Sun Jan 8 22:13:38 2006 @@ -74,6 +74,7 @@ Eric Bouck Thierry Bousch Monty Brandenberg +Georg Brandl Terrence Brannon Dave Brennan Tom Bridgman From python-checkins at python.org Mon Jan 9 07:24:36 2006 From: python-checkins at python.org (neal.norwitz) Date: Mon, 9 Jan 2006 07:24:36 +0100 (CET) Subject: [Python-checkins] r41982 - in python/trunk: Lib/test/test_curses.py Misc/NEWS Modules/_curses_panel.c Message-ID: <20060109062436.F0D9A1E4002@bag.python.org> Author: neal.norwitz Date: Mon Jan 9 07:24:35 2006 New Revision: 41982 Modified: python/trunk/Lib/test/test_curses.py python/trunk/Misc/NEWS python/trunk/Modules/_curses_panel.c Log: Bug #1400115, Fix segfault when calling curses.panel.userptr() without prior setting of the userptr. Will backport. Modified: python/trunk/Lib/test/test_curses.py ============================================================================== --- python/trunk/Lib/test/test_curses.py (original) +++ python/trunk/Lib/test/test_curses.py Mon Jan 9 07:24:35 2006 @@ -9,6 +9,7 @@ # import curses, sys, tempfile, os +import curses.panel # Optionally test curses module. This currently requires that the # 'curses' resource be given on the regrtest command line using the -u @@ -213,12 +214,22 @@ print 'curses.unctrl fails on character', repr(ch) +def test_userptr_without_set(stdscr): + w = curses.newwin(10, 10) + p = curses.panel.new_panel(w) + # try to access userptr() before calling set_userptr() -- segfaults + try: + p.userptr() + raise RuntimeError, 'userptr should fail since not set' + except curses.panel.error: + pass def main(stdscr): curses.savetty() try: module_funcs(stdscr) window_funcs(stdscr) + test_userptr_without_set(stdscr) finally: curses.resetty() Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Mon Jan 9 07:24:35 2006 @@ -209,6 +209,9 @@ Extension Modules ----------------- +- Bug #1400115, Fix segfault when calling curses.panel.userptr() + without prior setting of the userptr. + - Fix 64-bit problems in bsddb. - Patch #1365916: fix some unsafe 64-bit mmap methods. Modified: python/trunk/Modules/_curses_panel.c ============================================================================== --- python/trunk/Modules/_curses_panel.c (original) +++ python/trunk/Modules/_curses_panel.c Mon Jan 9 07:24:35 2006 @@ -299,6 +299,11 @@ PyObject *obj; PyCursesInitialised; obj = (PyObject *) panel_userptr(self->pan); + if (obj == NULL) { + PyErr_SetString(PyCursesError, "no userptr set"); + return NULL; + } + Py_INCREF(obj); return obj; } From python-checkins at python.org Mon Jan 9 07:29:17 2006 From: python-checkins at python.org (neal.norwitz) Date: Mon, 9 Jan 2006 07:29:17 +0100 (CET) Subject: [Python-checkins] r41983 - in python/branches/release24-maint: Lib/test/test_curses.py Misc/NEWS Modules/_curses_panel.c Message-ID: <20060109062917.5549F1E4002@bag.python.org> Author: neal.norwitz Date: Mon Jan 9 07:29:16 2006 New Revision: 41983 Modified: python/branches/release24-maint/Lib/test/test_curses.py python/branches/release24-maint/Misc/NEWS python/branches/release24-maint/Modules/_curses_panel.c Log: Backport: Bug #1400115, Fix segfault when calling curses.panel.userptr() without prior setting of the userptr. Modified: python/branches/release24-maint/Lib/test/test_curses.py ============================================================================== --- python/branches/release24-maint/Lib/test/test_curses.py (original) +++ python/branches/release24-maint/Lib/test/test_curses.py Mon Jan 9 07:29:16 2006 @@ -9,6 +9,7 @@ # import curses, sys, tempfile, os +import curses.panel # Optionally test curses module. This currently requires that the # 'curses' resource be given on the regrtest command line using the -u @@ -213,12 +214,22 @@ print 'curses.unctrl fails on character', repr(ch) +def test_userptr_without_set(stdscr): + w = curses.newwin(10, 10) + p = curses.panel.new_panel(w) + # try to access userptr() before calling set_userptr() -- segfaults + try: + p.userptr() + raise RuntimeError, 'userptr should fail since not set' + except curses.panel.error: + pass def main(stdscr): curses.savetty() try: module_funcs(stdscr) window_funcs(stdscr) + test_userptr_without_set(stdscr) finally: curses.resetty() Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Mon Jan 9 07:29:16 2006 @@ -193,6 +193,9 @@ Extension Modules ----------------- +- Bug #1400115, Fix segfault when calling curses.panel.userptr() + without prior setting of the userptr. + - Bug #1346533, select.poll() doesn't raise an error if timeout > sys.maxint - Fix memory leak in posix.access(). Modified: python/branches/release24-maint/Modules/_curses_panel.c ============================================================================== --- python/branches/release24-maint/Modules/_curses_panel.c (original) +++ python/branches/release24-maint/Modules/_curses_panel.c Mon Jan 9 07:29:16 2006 @@ -299,6 +299,11 @@ PyObject *obj; PyCursesInitialised; obj = (PyObject *) panel_userptr(self->pan); + if (obj == NULL) { + PyErr_SetString(PyCursesError, "no userptr set"); + return NULL; + } + Py_INCREF(obj); return obj; } From python-checkins at python.org Mon Jan 9 08:07:13 2006 From: python-checkins at python.org (neal.norwitz) Date: Mon, 9 Jan 2006 08:07:13 +0100 (CET) Subject: [Python-checkins] r41984 - in python/trunk: Misc/NEWS configure configure.in Message-ID: <20060109070713.CF9F41E4002@bag.python.org> Author: neal.norwitz Date: Mon Jan 9 08:07:12 2006 New Revision: 41984 Modified: python/trunk/Misc/NEWS python/trunk/configure python/trunk/configure.in Log: Fix bugs #1244610, #1392915, fix build problem on OpenBSD 3.7 and 3.8. configure would break checking curses.h. Will backport. Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Mon Jan 9 08:07:12 2006 @@ -12,6 +12,9 @@ Core and builtins ----------------- +- Bug #1244610, #1392915, fix build problem on OpenBSD 3.7 and 3.8. + configure would break checking curses.h. + - Bug #959576: The pwd module is now builtin. This allows Python to be built on UNIX platforms without $HOME set. Modified: python/trunk/configure ============================================================================== --- python/trunk/configure (original) +++ python/trunk/configure Mon Jan 9 08:07:12 2006 @@ -1,5 +1,5 @@ #! /bin/sh -# From configure.in Revision: 41953 . +# From configure.in Revision: 41975 . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.59 for python 2.5. # @@ -1480,7 +1480,7 @@ # On OpenBSD, select(2) is not available if _XOPEN_SOURCE is defined, # even though select is a POSIX function. Reported by J. Ribbens. # Reconfirmed for OpenBSD 3.3 by Zachary Hamm, for 3.4 by Jason Ish. - OpenBSD/2.* | OpenBSD/3.[0123456]) + OpenBSD/2.* | OpenBSD/3.[012345678]) define_xopen_source=no;; # On Solaris 2.6, sys/wait.h is inconsistent in the usage # of union __?sigval. Reported by Stuart Bishop. @@ -3431,7 +3431,7 @@ ;; OSF*) LDLIBRARY='libpython$(VERSION).so' - BLDLIBRARY='-rpath $(LIBDIR) -L. -lpython$(VERSION)' + BLDLIBRARY='-rpath $(LIBDIR) -L. -lpython$(VERSION)' RUNSHARED=LD_LIBRARY_PATH=`pwd`:${LD_LIBRARY_PATH} ;; atheos*) Modified: python/trunk/configure.in ============================================================================== --- python/trunk/configure.in (original) +++ python/trunk/configure.in Mon Jan 9 08:07:12 2006 @@ -140,7 +140,7 @@ # On OpenBSD, select(2) is not available if _XOPEN_SOURCE is defined, # even though select is a POSIX function. Reported by J. Ribbens. # Reconfirmed for OpenBSD 3.3 by Zachary Hamm, for 3.4 by Jason Ish. - OpenBSD/2.* | OpenBSD/3.@<:@0123456@:>@) + OpenBSD/2.* | OpenBSD/3.@<:@012345678@:>@) define_xopen_source=no;; # On Solaris 2.6, sys/wait.h is inconsistent in the usage # of union __?sigval. Reported by Stuart Bishop. @@ -594,7 +594,7 @@ ;; OSF*) LDLIBRARY='libpython$(VERSION).so' - BLDLIBRARY='-rpath $(LIBDIR) -L. -lpython$(VERSION)' + BLDLIBRARY='-rpath $(LIBDIR) -L. -lpython$(VERSION)' RUNSHARED=LD_LIBRARY_PATH=`pwd`:${LD_LIBRARY_PATH} ;; atheos*) From python-checkins at python.org Mon Jan 9 08:11:00 2006 From: python-checkins at python.org (neal.norwitz) Date: Mon, 9 Jan 2006 08:11:00 +0100 (CET) Subject: [Python-checkins] r41985 - in python/branches/release24-maint: Misc/NEWS configure configure.in Message-ID: <20060109071100.833B51E4039@bag.python.org> Author: neal.norwitz Date: Mon Jan 9 08:10:56 2006 New Revision: 41985 Modified: python/branches/release24-maint/Misc/NEWS python/branches/release24-maint/configure python/branches/release24-maint/configure.in Log: Backport: Fix bugs #1244610, #1392915, fix build problem on OpenBSD 3.7 and 3.8. configure would break checking curses.h. Also fix whitespace consistency which I forgot to mention in the head checkin. Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Mon Jan 9 08:10:56 2006 @@ -12,6 +12,9 @@ Core and builtins ----------------- +- Bug #1244610, #1392915, fix build problem on OpenBSD 3.7 and 3.8. + configure would break checking curses.h. + - Bug #959576: The pwd module is now builtin. This allows Python to be built on UNIX platforms without $HOME set. Modified: python/branches/release24-maint/configure ============================================================================== --- python/branches/release24-maint/configure (original) +++ python/branches/release24-maint/configure Mon Jan 9 08:10:56 2006 @@ -1,5 +1,5 @@ #! /bin/sh -# From configure.in Revision: 41853 . +# From configure.in Revision: 41976 . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.59 for python 2.4. # @@ -1480,7 +1480,7 @@ # On OpenBSD, select(2) is not available if _XOPEN_SOURCE is defined, # even though select is a POSIX function. Reported by J. Ribbens. # Reconfirmed for OpenBSD 3.3 by Zachary Hamm, for 3.4 by Jason Ish. - OpenBSD/2.* | OpenBSD/3.[0123456]) + OpenBSD/2.* | OpenBSD/3.[012345678]) define_xopen_source=no;; # On Solaris 2.6, sys/wait.h is inconsistent in the usage # of union __?sigval. Reported by Stuart Bishop. @@ -3431,7 +3431,7 @@ ;; OSF*) LDLIBRARY='libpython$(VERSION).so' - BLDLIBRARY='-rpath $(LIBDIR) -L. -lpython$(VERSION)' + BLDLIBRARY='-rpath $(LIBDIR) -L. -lpython$(VERSION)' RUNSHARED=LD_LIBRARY_PATH=`pwd`:${LD_LIBRARY_PATH} ;; atheos*) Modified: python/branches/release24-maint/configure.in ============================================================================== --- python/branches/release24-maint/configure.in (original) +++ python/branches/release24-maint/configure.in Mon Jan 9 08:10:56 2006 @@ -140,7 +140,7 @@ # On OpenBSD, select(2) is not available if _XOPEN_SOURCE is defined, # even though select is a POSIX function. Reported by J. Ribbens. # Reconfirmed for OpenBSD 3.3 by Zachary Hamm, for 3.4 by Jason Ish. - OpenBSD/2.* | OpenBSD/3.@<:@0123456@:>@) + OpenBSD/2.* | OpenBSD/3.@<:@012345678@:>@) define_xopen_source=no;; # On Solaris 2.6, sys/wait.h is inconsistent in the usage # of union __?sigval. Reported by Stuart Bishop. @@ -594,7 +594,7 @@ ;; OSF*) LDLIBRARY='libpython$(VERSION).so' - BLDLIBRARY='-rpath $(LIBDIR) -L. -lpython$(VERSION)' + BLDLIBRARY='-rpath $(LIBDIR) -L. -lpython$(VERSION)' RUNSHARED=LD_LIBRARY_PATH=`pwd`:${LD_LIBRARY_PATH} ;; atheos*) From python-checkins at python.org Mon Jan 9 08:29:04 2006 From: python-checkins at python.org (neal.norwitz) Date: Mon, 9 Jan 2006 08:29:04 +0100 (CET) Subject: [Python-checkins] r41986 - python/trunk/Misc/README.OpenBSD Message-ID: <20060109072904.3EE7F1E4007@bag.python.org> Author: neal.norwitz Date: Mon Jan 9 08:29:03 2006 New Revision: 41986 Added: python/trunk/Misc/README.OpenBSD Log: Add some notes about a recurring problem with OpenBSD Added: python/trunk/Misc/README.OpenBSD ============================================================================== --- (empty file) +++ python/trunk/Misc/README.OpenBSD Mon Jan 9 08:29:03 2006 @@ -0,0 +1,38 @@ + +2005-01-08 + +If you are have a problem building on OpenBSD and see output like this +while running configure: + +checking curses.h presence... yes +configure: WARNING: curses.h: present but cannot be compiled +configure: WARNING: curses.h: check for missing prerequisite headers? +configure: WARNING: curses.h: see the Autoconf documentation +configure: WARNING: curses.h: section "Present But Cannot Be Compiled" +configure: WARNING: curses.h: proceeding with the preprocessor's result +configure: WARNING: curses.h: in the future, the compiler will take precedence + +there is likely a problem that will prevent building python. +If you see the messages above and are able to completely build python, +please tell python-dev at python.org indicating your version of OpenBSD +and any other relevant system configuration. + +The build error that occurs while making may look something like this: + + /usr/include/sys/event.h:53: error: syntax error before "u_int" + /usr/include/sys/event.h:55: error: syntax error before "u_short" + +To fix this problem, you will probably need update Python's configure +script to disable certain options. Search for a line that looks like: + + OpenBSD/2.* | OpenBSD/3.@<:@012345678@:>@) + +If your version is not in that list, e.g., 3.9, add the version +number. In this case, you would just need to add a 9 after the 8. +If you modify configure.in, you will need to regenerate configure +with autoconf. + +If your version is already in the list, this is not a known problem. +Please submit a bug report here: + + http://sourceforge.net/tracker/?group_id=5470&atid=105470 From python-checkins at python.org Mon Jan 9 13:45:05 2006 From: python-checkins at python.org (walter.doerwald) Date: Mon, 9 Jan 2006 13:45:05 +0100 (CET) Subject: [Python-checkins] r41987 - python/trunk/Doc/lib/libcodecs.tex Message-ID: <20060109124505.B9DFE1E400A@bag.python.org> Author: walter.doerwald Date: Mon Jan 9 13:45:01 2006 New Revision: 41987 Modified: python/trunk/Doc/lib/libcodecs.tex Log: Fix typos. Modified: python/trunk/Doc/lib/libcodecs.tex ============================================================================== --- python/trunk/Doc/lib/libcodecs.tex (original) +++ python/trunk/Doc/lib/libcodecs.tex Mon Jan 9 13:45:01 2006 @@ -546,7 +546,7 @@ that choose a different subset of all unicode code points and how these codepoints are mapped to the bytes 0x0-0xff. To see how this is done simply open e.g. encodings/cp1252.py (which is an encoding that -is used primarily on Windows). There's string constant with 256 +is used primarily on Windows). There's a string constant with 256 characters that shows you which character is mapped to which byte value. @@ -584,7 +584,7 @@ byte sequence consists of two parts: Marker bits (the most significant bits) and payload bits. The marker bits are a sequence of zero to six 1 bits followed by a 0 bit. Unicode characters are encoded like this -(with x being a payload bit, which when concatenated give the Unicode +(with x being payload bits, which when concatenated give the Unicode character): \begin{tableii}{l|l}{textrm}{}{Range}{Encoding} @@ -608,7 +608,7 @@ encoding can decode any random byte sequence. However that's not possible with UTF-8, as UTF-8 byte sequences have a structure that doesn't allow arbitrary byte sequence. To increase the reliability -with which an UTF-8 encoding can be detected, Microsoft invented a +with which a UTF-8 encoding can be detected, Microsoft invented a variant of UTF-8 (that Python 2.5 calls "utf-8-sig") for its Notepad program: Before any of the Unicode characters is written to the file, a UTF-8 encoded BOM (which looks like this as a byte sequence: 0xef, From mal at egenix.com Mon Jan 9 13:53:53 2006 From: mal at egenix.com (M.-A. Lemburg) Date: Mon, 09 Jan 2006 13:53:53 +0100 Subject: [Python-checkins] r41971 - in python/branches/ssize_t: Include/abstract.h Include/bufferobject.h Include/listobject.h Include/pystrtod.h Include/stringobject.h Include/tupleobject.h Objects/abstract.c Objects/bufferobject.c Objects/classobject.c Objects/codeobject.c Objects/descrobject.c Objects/dictobject.c Objects/enumobject.c Objects/fileobject.c Objects/frameobject.c Objects/funcobject.c Objects/iterobject.c Objects/listobject.c Objects/methodobject.c Objects/object.c Objects/setobject.c Objects/stringobject.c Objects/structseq.c Objects/tupleobject.c Objects/typeobject.c Objects/weakrefobject.c Python/pystrtod.c In-Reply-To: <20060108054820.15F321E4002@bag.python.org> References: <20060108054820.15F321E4002@bag.python.org> Message-ID: <43C25CE1.4070106@egenix.com> neal.norwitz wrote: > Author: neal.norwitz > Date: Sun Jan 8 06:48:15 2006 > New Revision: 41971 > > Log: > Get rid of a bunch of warnings mostly by converting int -> Py_ssize_t > > Modified: python/branches/ssize_t/Include/abstract.h > ============================================================================== > --- python/branches/ssize_t/Include/abstract.h (original) > +++ python/branches/ssize_t/Include/abstract.h Sun Jan 8 06:48:15 2006 > @@ -422,7 +422,7 @@ > PyAPI_FUNC(Py_ssize_t) PyObject_Length(PyObject *o); > #define PyObject_Length PyObject_Size > > - PyAPI_FUNC(int) _PyObject_LengthCue(PyObject *o); > + PyAPI_FUNC(Py_ssize_t) _PyObject_LengthCue(PyObject *o); > > /* > Return the size of object o. If the object, o, provides > @@ -513,7 +513,7 @@ > > PyAPI_FUNC(int) PyObject_AsWriteBuffer(PyObject *obj, > void **buffer, > - int *buffer_len); > + Py_ssize_t *buffer_len); > Changes like these (variables passed in by reference) should be handle more carefully and also discussed on python-dev. The reason is that these changes will introduce major changes in extensions using these APIs. We will also have to make sure that imports of extensions which have not been compiled against the new API fail to import - otherwise these extensions are bound to seg-fault, cause buffer overflows, etc. due to such changes in pointer references. This is especially important on 64-bit platforms such as AMD64 where sizeof(int) == 32 while sizeof(Py_ssize_t) == 64. > /* > Takes an arbitrary object which must support the (writeable, > > Modified: python/branches/ssize_t/Include/bufferobject.h > ============================================================================== > --- python/branches/ssize_t/Include/bufferobject.h (original) > +++ python/branches/ssize_t/Include/bufferobject.h Sun Jan 8 06:48:15 2006 > @@ -17,15 +17,15 @@ > #define Py_END_OF_BUFFER (-1) > > PyAPI_FUNC(PyObject *) PyBuffer_FromObject(PyObject *base, > - int offset, Py_ssize_t size); > + Py_ssize_t offset, Py_ssize_t size); > PyAPI_FUNC(PyObject *) PyBuffer_FromReadWriteObject(PyObject *base, > - int offset, > - Py_ssize_t size); > + Py_ssize_t offset, > + Py_ssize_t size); > > PyAPI_FUNC(PyObject *) PyBuffer_FromMemory(void *ptr, Py_ssize_t size); > PyAPI_FUNC(PyObject *) PyBuffer_FromReadWriteMemory(void *ptr, Py_ssize_t size); > > -PyAPI_FUNC(PyObject *) PyBuffer_New(int size); > +PyAPI_FUNC(PyObject *) PyBuffer_New(Py_ssize_t size); > > #ifdef __cplusplus > } > > > > Modified: python/branches/ssize_t/Include/pystrtod.h > ============================================================================== > --- python/branches/ssize_t/Include/pystrtod.h (original) > +++ python/branches/ssize_t/Include/pystrtod.h Sun Jan 8 06:48:15 2006 > @@ -8,7 +8,7 @@ > > PyAPI_FUNC(double) PyOS_ascii_strtod(const char *str, char **ptr); > PyAPI_FUNC(double) PyOS_ascii_atof(const char *str); > -PyAPI_FUNC(char *) PyOS_ascii_formatd(char *buffer, int buf_len, const char *format, double d); > +PyAPI_FUNC(char *) PyOS_ascii_formatd(char *buffer, size_t buf_len, const char *format, double d); Shouldn't this by Py_ssize_t as well ?! > Modified: python/branches/ssize_t/Objects/setobject.c > ============================================================================== > --- python/branches/ssize_t/Objects/setobject.c (original) > +++ python/branches/ssize_t/Objects/setobject.c Sun Jan 8 06:48:15 2006 > @@ -51,8 +51,8 @@ > static setentry * > set_lookkey(PySetObject *so, PyObject *key, register long hash) > { > - register int i; > - register unsigned int perturb; > + register Py_ssize_t i; > + register size_t perturb; > register setentry *freeslot; > register unsigned int mask = so->mask; > setentry *table = so->table; > @@ -129,8 +129,8 @@ > static setentry * > set_lookkey_string(PySetObject *so, PyObject *key, register long hash) > { > - register int i; > - register unsigned int perturb; > + register Py_ssize_t i; > + register size_t perturb; > register setentry *freeslot; > register unsigned int mask = so->mask; > setentry *table = so->table; Same here. [Have skipped the rest] -- Marc-Andre Lemburg eGenix.com Professional Python Services directly from the Source (#1, Jan 09 2006) >>> Python/Zope Consulting and Support ... http://www.egenix.com/ >>> mxODBC.Zope.Database.Adapter ... http://zope.egenix.com/ >>> mxODBC, mxDateTime, mxTextTools ... http://python.egenix.com/ ________________________________________________________________________ ::: Try mxODBC.Zope.DA for Windows,Linux,Solaris,FreeBSD for free ! :::: From python-checkins at python.org Mon Jan 9 23:01:18 2006 From: python-checkins at python.org (fredrik.lundh) Date: Mon, 9 Jan 2006 23:01:18 +0100 (CET) Subject: [Python-checkins] r41988 - python/trunk/Doc/lib/xmldompulldom.tex Message-ID: <20060109220118.DB6951E4082@bag.python.org> Author: fredrik.lundh Date: Mon Jan 9 23:01:18 2006 New Revision: 41988 Modified: python/trunk/Doc/lib/xmldompulldom.tex Log: typo Modified: python/trunk/Doc/lib/xmldompulldom.tex ============================================================================== --- python/trunk/Doc/lib/xmldompulldom.tex (original) +++ python/trunk/Doc/lib/xmldompulldom.tex Mon Jan 9 23:01:18 2006 @@ -38,7 +38,7 @@ \begin{datadesc}{default_bufsize} - Default value for the \var{busize} parameter to \function{parse()}. + Default value for the \var{bufsize} parameter to \function{parse()}. \versionchanged[The value of this variable can be changed before calling \function{parse()} and the new value will take effect]{2.1} From martin at v.loewis.de Mon Jan 9 23:01:40 2006 From: martin at v.loewis.de (=?ISO-8859-1?Q?=22Martin_v=2E_L=F6wis=22?=) Date: Mon, 09 Jan 2006 23:01:40 +0100 Subject: [Python-checkins] r41971 - in python/branches/ssize_t: Include/abstract.h Include/bufferobject.h Include/listobject.h Include/pystrtod.h Include/stringobject.h Include/tupleobject.h Objects/abstract.c Objects/bufferobject.c Objects/classobject.c Objects/codeobject.c Objects/descrobject.c Objects/dictobject.c Objects/enumobject.c Objects/fileobject.c Objects/frameobject.c Objects/funcobject.c Objects/iterobject.c Objects/listobject.c Objects/methodobject.c Objects/object.c Objects/setobject.c Objects/stringobject.c Objects/structseq.c Objects/tupleobject.c Objects/typeobject.c Objects/weakrefobject.c Python/pystrtod.c In-Reply-To: <43C25CE1.4070106@egenix.com> References: <20060108054820.15F321E4002@bag.python.org> <43C25CE1.4070106@egenix.com> Message-ID: <43C2DD44.2040908@v.loewis.de> M.-A. Lemburg wrote: > Changes like these (variables passed in by reference) should be > handle more carefully and also discussed on python-dev. The change was discussed on python-dev; it is part of the PEP (which still awaits a number). Notice that the change is in the ssize_t-branch only so far. > The reason is that these changes will introduce major changes > in extensions using these APIs. I don't believe the change is major. It only affects a few extensions, and for these, it is only a minor change. A single line of changing will be enough. Regards, Martin From python-checkins at python.org Mon Jan 9 23:02:27 2006 From: python-checkins at python.org (fredrik.lundh) Date: Mon, 9 Jan 2006 23:02:27 +0100 (CET) Subject: [Python-checkins] r41989 - python/trunk/Doc/lib/libwinreg.tex Message-ID: <20060109220227.18F9C1E4002@bag.python.org> Author: fredrik.lundh Date: Mon Jan 9 23:02:26 2006 New Revision: 41989 Modified: python/trunk/Doc/lib/libwinreg.tex Log: fixed broken sentence Modified: python/trunk/Doc/lib/libwinreg.tex ============================================================================== --- python/trunk/Doc/lib/libwinreg.tex (original) +++ python/trunk/Doc/lib/libwinreg.tex Mon Jan 9 23:02:26 2006 @@ -25,9 +25,9 @@ Closes a previously opened registry key. The hkey argument specifies a previously opened key. - Note that if \var{hkey} is not closed using this method, (or the - \method{handle.Close()} closed when the \var{hkey} object is - destroyed by Python. + Note that if \var{hkey} is not closed using this method (or via + \method{handle.Close()}), it is closed when the \var{hkey} object + is destroyed by Python. \end{funcdesc} From python-checkins at python.org Mon Jan 9 23:36:59 2006 From: python-checkins at python.org (georg.brandl) Date: Mon, 9 Jan 2006 23:36:59 +0100 (CET) Subject: [Python-checkins] r41990 - python/trunk/Lib/test/test_builtin.py Message-ID: <20060109223659.059A81E4007@bag.python.org> Author: georg.brandl Date: Mon Jan 9 23:36:58 2006 New Revision: 41990 Modified: python/trunk/Lib/test/test_builtin.py Log: Correct test_builtin locale handling. Modified: python/trunk/Lib/test/test_builtin.py ============================================================================== --- python/trunk/Lib/test/test_builtin.py (original) +++ python/trunk/Lib/test/test_builtin.py Mon Jan 9 23:36:58 2006 @@ -557,7 +557,7 @@ # set locale to something that doesn't use '.' for the decimal point try: import locale - orig_locale = locale.setlocale(locale.LC_NUMERIC, '') + orig_locale = locale.setlocale(locale.LC_NUMERIC) locale.setlocale(locale.LC_NUMERIC, 'fr_FR') except: # if we can't set the locale, just ignore this test From python-checkins at python.org Mon Jan 9 23:45:37 2006 From: python-checkins at python.org (fredrik.lundh) Date: Mon, 9 Jan 2006 23:45:37 +0100 (CET) Subject: [Python-checkins] r41991 - python/trunk/Doc/lib/libwinreg.tex Message-ID: <20060109224537.833E81E4034@bag.python.org> Author: fredrik.lundh Date: Mon Jan 9 23:45:34 2006 New Revision: 41991 Modified: python/trunk/Doc/lib/libwinreg.tex Log: added a couple of missing periods. Modified: python/trunk/Doc/lib/libwinreg.tex ============================================================================== --- python/trunk/Doc/lib/libwinreg.tex (original) +++ python/trunk/Doc/lib/libwinreg.tex Mon Jan 9 23:45:34 2006 @@ -60,7 +60,7 @@ be \code{None}. In that case, the handle returned is the same key handle passed in to the function. - If the key already exists, this function opens the existing key + If the key already exists, this function opens the existing key. The return value is the handle of the opened key. If the function fails, an \exception{EnvironmentError} exception is @@ -164,7 +164,7 @@ \var{key} is an already open key, or any of the predefined \constant{HKEY_*} constants. - \var{sub_key} is a string that identifies the sub_key to load + \var{sub_key} is a string that identifies the sub_key to load. \var {file_name} is the name of the file to load registry data from. This file must have been created with the \function{SaveKey()} function. @@ -192,14 +192,14 @@ \var{key} is an already open key, or any one of the predefined \constant{HKEY_*} constants. - \var{sub_key} is a string that identifies the sub_key to open + \var{sub_key} is a string that identifies the sub_key to open. \var{res} is a reserved integer, and must be zero. The default is zero. \var{sam} is an integer that specifies an access mask that describes the desired security access for the key. Default is \constant{KEY_READ} - The result is a new handle to the specified key + The result is a new handle to the specified key. If the function fails, \exception{EnvironmentError} is raised. \end{funcdesc} From python-checkins at python.org Tue Jan 10 00:14:56 2006 From: python-checkins at python.org (andrew.kuchling) Date: Tue, 10 Jan 2006 00:14:56 +0100 (CET) Subject: [Python-checkins] r41992 - sandbox/trunk/pycon sandbox/trunk/pycon/get.sh sandbox/trunk/pycon/parse-sched.py sandbox/trunk/pycon/talks.py Message-ID: <20060109231456.175521E4050@bag.python.org> Author: andrew.kuchling Date: Tue Jan 10 00:14:54 2006 New Revision: 41992 Added: sandbox/trunk/pycon/ sandbox/trunk/pycon/get.sh sandbox/trunk/pycon/parse-sched.py (contents, props changed) sandbox/trunk/pycon/talks.py Log: Add PyCon scheduling script Added: sandbox/trunk/pycon/get.sh ============================================================================== --- (empty file) +++ sandbox/trunk/pycon/get.sh Tue Jan 10 00:14:54 2006 @@ -0,0 +1,6 @@ +#!/bin/sh + +wget -U "Mozilla/1.0 (GNU w-get)" -O - \ + http://wiki.python.org/moin/PyCon2006/FinalSchedule?action=raw \ + >FinalSchedule + Added: sandbox/trunk/pycon/parse-sched.py ============================================================================== --- (empty file) +++ sandbox/trunk/pycon/parse-sched.py Tue Jan 10 00:14:54 2006 @@ -0,0 +1,230 @@ +#!/usr/bin/python + +# Reads a page with a wiki-format table + +# Basic data structure of dictionary: +# {(year, month, day) -> [(time, duration, title)] + +import sys, optparse +import re, string +import pprint, cgi +import datetime + +import talks + +date_pat = re.compile('^=\s+(\d{4})-(\d{2})-(\d{1,2})\s+.*\s+=') +line_pat = re.compile('[|]{2}.*[|]{2}\s*$') +talk_pat = re.compile('#(\d+)') + +def parse (): + lines = sys.stdin.readlines() + lines = map(string.strip, lines) + d = {} + date = None + + for line in lines: + m = date_pat.match(line) + if m: + date = [int(value) for value in m.group(1,2,3)] + date = tuple(date) + + m = line_pat.match(line) + if m: + if date is None: + print >>sys.stderr, "Table line before date header: %r" % line + else: + L = line.split('||') + L = map(string.strip, L) + assert L[0] == '' + assert L[-1] == '' + L = L[1:-1] + if len(L) != 4: + print >>sys.stderr, "Wrong number of fields in line: %r" % line + # Skip headers + elif L[0].lower() == 'room': + pass + else: + event_list = d.setdefault(date, []) + t = canonicalize_presentation(L) + event_list.append(t) + + return d + + +time_pat = re.compile('(\d+):(\d+)') + +def parse_time (S): + """(str): (int, int) + Parse a time into an (hour, minute) tuple. + """ + m = time_pat.match(S) + assert m is not None + hour, min = int(m.group(1)), int(m.group(2)) + return hour, min + + +def canonicalize_presentation (L): + """Take a 4-item list for a talk and perform various + corrections to it. + """ + hour, min = parse_time(L[1]) + L[1] = '%02i:%02i' % (hour, min) + L[2] = int(L[2]) + return tuple(L) + + + +# +# HTML generation functions +# + +def add_time (start_time, duration): + hour, min = parse_time(start_time) + while duration > 60: + hour += 1 + duration -= 60 + min += duration + while min >= 60: + hour += 1 + min -= 60 + return '%02i:%02i' % (hour, min) + +def find_next_time (time_list, end_time): + if len(time_list) == 0: + return 1 + span = 0 + while (time_list[span][0] < end_time and span < len(time_list)): + span += 1 + return span+1 + + +def format_day (day, output): + # Figure out unique rooms + rooms = [] + for room, time, duration, title in day: + # XXX change to use regex pattern + if room.startswith('-'): + continue + if room not in rooms: + rooms.append(room) + rooms.sort() + + # Move Bent Tree to be last + if 'Bent Tree' in rooms: + rooms.remove('Bent Tree') + rooms.append('Bent Tree') + + # Print room header + print >>output, ''' ++''' % len(rooms) + + print >>output, "" + print >>output, '', + for room in rooms: + print >>output, '' % room, + print >>output, '' + print >>output, "" + + # Sort list + time_dict = {} + for room, time, duration, title in day: + d = time_dict.setdefault(time, {}) + d[room] = (duration, title) + time_list = time_dict.items() + time_list.sort() + + active = {} + print '' + while len(time_list) > 0: + time, room_dict = time_list.pop(0) + print >>output, '', + for act_room, end_time in active.items(): + if end_time <= time: + del active[act_room] + print >>output, '' % time, + plenary = (len(room_dict) == 1) + if plenary: + # Plenary session of some sort + duration, title = room_dict.values()[0] + colspan = len(rooms) + print >>output, '' % (colspan, title), + print >>output, '' + continue + + for room in rooms: + # Room still occupied, so skip it + if room in active: + continue + + # New room + t = room_dict.get(room) + if t is None: + print >>output, '', + else: + duration, title = t + end_time = add_time(time, duration) + active[room] = end_time + rowspan = find_next_time(time_list, end_time) + + # Turn talk numbers into a link with a title + m = talk_pat.match(title) + if m is not None: + talk_num = int(m.group(1)) + title = talks.get_title(talk_num) + else: + title = cgi.escape(title) + + print >>output, '', + + print >>output, '' + + print '' + print >>output, '
    %s
    %s%s
     ' % rowspan, + print >>output, title, + print >>output, '
    ' + + +def output_html (d, output): + L = d.items() ; L.sort() + + for (y, m, day), day_data in L: + date = datetime.date(y, m, day) + print >>output, date.strftime('

    %A, %B %d %Y

    ') + format_day(day_data, output) + + +def main (): + parser = optparse.OptionParser(usage="usage: %prog [options] < final-schedule") + parser.add_option('--format', + type='choice', + choices=['pickle', 'python', 'print', 'html'], + default='print', + action="store", dest="format", + help = "Select output format") + options, args = parser.parse_args() + + d = parse() + fmt = options.format + if fmt == 'print': + pprint.pprint(d) + elif fmt == 'python': + print 'schedule =', + pprint.pprint(d) + elif fmt == 'pickle': + import cPickle + cPickle.dump(d, sys.stdout) + elif fmt == 'html': + output_html(d, sys.stdout) + else: + print >>sys.stderr, "Unknown format %r" % fmt + sys.exit(1) + +if __name__ == '__main__': + main() + + + + + + Added: sandbox/trunk/pycon/talks.py ============================================================================== --- (empty file) +++ sandbox/trunk/pycon/talks.py Tue Jan 10 00:14:54 2006 @@ -0,0 +1,70 @@ + +talk_dict = { + 1: 'Introduction to Pyparsing: An Object-oriented Easy-to-Use Toolkit for Building Recursive Descent Parsers', + 2: 'Agile Documentation: using tests as documentation', + 4: 'An Interactive Adventure Game Engine Built Using Pyparsing', + 5: 'Desktop Application Programming With PyGTK and Glade', + 6: 'Vertebral Fracture Analysis', + 8: 'Scripting .NET with IronPython', + 11: 'Effective AJAX with TurboGears', + 12: 'Stackless Python in EVE Online', + 13: "Cuaima MetaInstaller. New tool for managing System's Installations.", + 14: 'Osh: An Open-Source Python-Based Object-Oriented Shell', + 16: 'Decimal for beginners', + 17: 'Large-scale, cross-platform synchronization using embedded python', + 18: 'Internet Access via Anti-Virus Policy Enforcement Software and Messaging Service', + 19: 'State-of-the-art Python IDEs', + 21: 'Developing an Internationalized Application in Python: Chandler a case study', + 23: 'Processing XML with ElementTree', + 24: 'What is Nabu?', + 25: 'TurboGears Tutorial', + 26: 'Packaging Programs with py2exe', + 29: 'Python in Business : Thyme, a business-oriented Python development framework.', + 30: 'Python at Home : In Control', + 31: 'Extending the life of CVS with Python', + 34: 'The State of Dabo', + 35: 'PyPy architecture session', + 36: 'Python Can Survive In The Enterprise', + 37: 'Beyond Scripting: Using Python to create a medical information system with graphical template and database schema design', + 38: 'PyPy -- where we are now', + 39: 'The Rest Of The Web Stack', + 40: 'SAM: Transforming a commandline tool to Web 3000 (c)', + 41: 'Teaching Python - Anecdotes from the Field', + 45: 'Implementation of the Python Bytecode Compiler', + 46: 'Making Apples from Applesauce: The Evolution of cvs2svn', + 47: 'Agile open-source methods, Businesses and EU Funding', + 48: 'Extensible Desktop Applications: Abusing the Zope 3 Project', + 50: 'Using Django to supercharge Web development', + 52: 'New Tools for Testing Web Applications with Python', + 53: 'vobject - An iCalendar Library', + 54: 'Zanshin: Zen and the Art of Network Client Programming', + 55: 'IronPython Implementation', + 56: 'Python tools for regional hydrologic modeling in South Florida', + 57: 'pysense: Humanoid Robots, a Wearable System, and Python', + 58: 'Introduction to Zope Application Development', + 59: 'Introduction to CMF Application Development', + 60: 'Mission-Critical Python and the Brave New Web', + 62: 'Docutils Developers Tutorial: Architecture, Extending, and Embedding', + 63: 'Django tutorial', + 64: 'Creating Presentations With Docutils and S5', + 65: 'Understanding Unicode', + 66: 'Building Pluggable Software with Eggs', + 67: 'Gamma: An Atom Publishing Protocol implementation for Zope 3', + 69: 'Python in Your Pocket: Python for Series 60', + 70: 'Simplifying Red-Black Trees', + 71: 'entransit, a content deployment system', + 72: 'bazaar-ng distributed version control', + 73: 'State of Zope', + } + +import cgi + +def get_title (num): + title = talk_dict.get(num) + if title is None: + return '#' + str(num) + + url = 'http://wiki.python.org/moin/PyCon2006/Talks#' + str(num) + title = cgi.escape(title) + return '%s' % (url, title) + From python-checkins at python.org Tue Jan 10 00:42:12 2006 From: python-checkins at python.org (andrew.kuchling) Date: Tue, 10 Jan 2006 00:42:12 +0100 (CET) Subject: [Python-checkins] r41993 - sandbox/trunk/pycon/README Message-ID: <20060109234212.66B461E400B@bag.python.org> Author: andrew.kuchling Date: Tue Jan 10 00:42:07 2006 New Revision: 41993 Added: sandbox/trunk/pycon/README Log: Add README Added: sandbox/trunk/pycon/README ============================================================================== --- (empty file) +++ sandbox/trunk/pycon/README Tue Jan 10 00:42:07 2006 @@ -0,0 +1,23 @@ + +This directory contains a script for automatically generating +the schedule for PyCon. More instructions are in the wiki at +. + +get.sh Fetches the source of the PyCon2006/FinalSchedule wiki page + and saves it as FinalSchedule. + (Uses GNU wget) +parse-sched.py Reads the source of the FinalSchedule page from stdin, + and can output the schedule as HTML or a Python + data structure. + +Run get.sh once; then invoke the script with something like: + + ./parse-sched.py --format=html schedule.html + +Please feel free to submit patches that add new output formats. If you +have commit privileges, feel free to just check in changes that add +new formats. + + +A.M. Kuchling +amk at amk.ca From python-checkins at python.org Tue Jan 10 04:49:23 2006 From: python-checkins at python.org (phillip.eby) Date: Tue, 10 Jan 2006 04:49:23 +0100 (CET) Subject: [Python-checkins] r41994 - in sandbox/trunk/setuptools: pkg_resources.py pkg_resources.txt setuptools/tests/test_resources.py Message-ID: <20060110034923.2982C1E4002@bag.python.org> Author: phillip.eby Date: Tue Jan 10 04:49:22 2006 New Revision: 41994 Modified: sandbox/trunk/setuptools/pkg_resources.py sandbox/trunk/setuptools/pkg_resources.txt sandbox/trunk/setuptools/setuptools/tests/test_resources.py Log: ``safe_name()`` now allows dots in project names, and there is a new ``to_filename()`` function that escapes project names and versions for safe use in constructing egg filenames from a Distribution object's metadata. Note that allowing dots may now cause problems for projects with '.' in the name that were previously installed, since such projects had to be spelled with a '-' before. The '-' name will no longer match the '.' project, and there is no real room for backward compatibility here. :( Modified: sandbox/trunk/setuptools/pkg_resources.py ============================================================================== --- sandbox/trunk/setuptools/pkg_resources.py (original) +++ sandbox/trunk/setuptools/pkg_resources.py Tue Jan 10 04:49:22 2006 @@ -61,7 +61,7 @@ # Parsing functions and string utilities 'parse_requirements', 'parse_version', 'safe_name', 'safe_version', 'get_platform', 'compatible_platforms', 'yield_lines', 'split_sections', - 'safe_extra', + 'safe_extra', 'to_filename', # filesystem utilities 'ensure_directory', 'normalize_path', @@ -821,9 +821,9 @@ def safe_name(name): """Convert an arbitrary string to a standard distribution name - Any runs of non-alphanumeric characters are replaced with a single '-'. + Any runs of non-alphanumeric/. characters are replaced with a single '-'. """ - return re.sub('[^A-Za-z0-9]+', '-', name) + return re.sub('[^A-Za-z0-9.]+', '-', name) def safe_version(version): @@ -842,15 +842,15 @@ Any runs of non-alphanumeric characters are replaced with a single '_', and the result is always lowercased. """ - return re.sub('[^A-Za-z0-9]+', '_', extra).lower() - - - - - + return re.sub('[^A-Za-z0-9.]+', '_', extra).lower() +def to_filename(name): + """Convert a project or version name to its filename-escaped form + Any '-' characters are currently replaced with '_'. + """ + return name.replace('-','_') @@ -1529,7 +1529,7 @@ LINE_END = re.compile(r"\s*(#.*)?$").match # whitespace and comment CONTINUE = re.compile(r"\s*\\\s*(#.*)?$").match # line continuation -DISTRO = re.compile(r"\s*((\w|-)+)").match # Distribution or option +DISTRO = re.compile(r"\s*((\w|[-.])+)").match # Distribution or extra VERSION = re.compile(r"\s*(<=?|>=?|==|!=)\s*((\w|[-.])+)").match # ver. info COMMA = re.compile(r"\s*,").match # comma between items OBRACKET = re.compile(r"\s*\[").match @@ -1846,7 +1846,7 @@ def egg_name(self): """Return what this distribution's standard .egg filename should be""" filename = "%s-%s-py%s" % ( - self.project_name.replace('-','_'), self.version.replace('-','_'), + to_filename(self.project_name), to_filename(self.version), self.py_version or PY_MAJOR ) Modified: sandbox/trunk/setuptools/pkg_resources.txt ============================================================================== --- sandbox/trunk/setuptools/pkg_resources.txt (original) +++ sandbox/trunk/setuptools/pkg_resources.txt Tue Jan 10 04:49:22 2006 @@ -1445,6 +1445,12 @@ similar to ``safe_name()`` except that non-alphanumeric runs are replaced by a single underbar (``_``), and the result is lowercased. +``to_filename(name_or_version)`` + Escape a name or version string so it can be used in a dash-separated + filename (or ``#egg=name-version`` tag) without ambiguity. You + should only pass in values that were returned by ``safe_name()`` or + ``safe_version()``. + Platform Utilities ------------------ @@ -1511,6 +1517,13 @@ Release Notes/Change History ---------------------------- +0.6a10 + * ``safe_name()`` now allows dots in project names. + + * There is a new ``to_filename()`` function that escapes project names and + versions for safe use in constructing egg filenames from a Distribution + object's metadata. + 0.6a9 * Don't raise an error when an invalid (unfinished) distribution is found unless absolutely necessary. Warn about skipping invalid/unfinished eggs Modified: sandbox/trunk/setuptools/setuptools/tests/test_resources.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/tests/test_resources.py (original) +++ sandbox/trunk/setuptools/setuptools/tests/test_resources.py Tue Jan 10 04:49:22 2006 @@ -406,7 +406,7 @@ self.assertEqual(safe_name("WSGI Utils"), "WSGI-Utils") self.assertEqual(safe_name("WSGI Utils"), "WSGI-Utils") self.assertEqual(safe_name("Money$$$Maker"), "Money-Maker") - self.assertEqual(safe_name("peak.web"), "peak-web") + self.assertNotEqual(safe_name("peak.web"), "peak-web") def testSafeVersion(self): self.assertEqual(safe_version("1.2-1"), "1.2-1") From python-checkins at python.org Tue Jan 10 05:00:56 2006 From: python-checkins at python.org (phillip.eby) Date: Tue, 10 Jan 2006 05:00:56 +0100 (CET) Subject: [Python-checkins] r41995 - in sandbox/trunk/setuptools: EasyInstall.txt setuptools/command/easy_install.py setuptools/command/egg_info.py setuptools/package_index.py Message-ID: <20060110040056.E20401E4002@bag.python.org> Author: phillip.eby Date: Tue Jan 10 05:00:54 2006 New Revision: 41995 Modified: sandbox/trunk/setuptools/EasyInstall.txt sandbox/trunk/setuptools/setuptools/command/easy_install.py sandbox/trunk/setuptools/setuptools/command/egg_info.py sandbox/trunk/setuptools/setuptools/package_index.py Log: EasyInstall can now download bare ``.py`` files and wrap them in an egg, as long as you include an ``#egg=name-version`` suffix on the URL, or if the ``.py`` file is listed as the "Download URL" on the project's PyPI page. This allows third parties to "package" trivial Python modules just by linking to them (e.g. from within their own PyPI page or download links page). Modified: sandbox/trunk/setuptools/EasyInstall.txt ============================================================================== --- sandbox/trunk/setuptools/EasyInstall.txt (original) +++ sandbox/trunk/setuptools/EasyInstall.txt Tue Jan 10 05:00:54 2006 @@ -968,6 +968,13 @@ normalized form of the name was used, which could lead to unnecessary full-index searches when a project's name had an underscore (``_``) in it. + * EasyInstall can now download bare ``.py`` files and wrap them in an egg, + as long as you include an ``#egg=name-version`` suffix on the URL, or if + the ``.py`` file is listed as the "Download URL" on the project's PyPI page. + This allows third parties to "package" trivial Python modules just by + linking to them (e.g. from within their own PyPI page or download links + page). + 0.6a9 * Fixed ``.pth`` file processing picking up nested eggs (i.e. ones inside Modified: sandbox/trunk/setuptools/setuptools/command/easy_install.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/easy_install.py (original) +++ sandbox/trunk/setuptools/setuptools/command/easy_install.py Tue Jan 10 05:00:54 2006 @@ -499,7 +499,7 @@ # Anything else, try to extract and build setup_base = tmpdir - if os.path.isfile(dist_filename): + if os.path.isfile(dist_filename) and not dist_filename.endswith('.py'): unpack_archive(dist_filename, tmpdir, self.unpack_progress) elif os.path.isdir(dist_filename): setup_base = os.path.abspath(dist_filename) Modified: sandbox/trunk/setuptools/setuptools/command/egg_info.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/egg_info.py (original) +++ sandbox/trunk/setuptools/setuptools/command/egg_info.py Tue Jan 10 05:00:54 2006 @@ -12,7 +12,7 @@ from distutils.util import convert_path from distutils.filelist import FileList from pkg_resources import parse_requirements, safe_name, parse_version, \ - safe_version, yield_lines, EntryPoint, iter_entry_points + safe_version, yield_lines, EntryPoint, iter_entry_points, to_filename from sdist import walk_revctrl class egg_info(Command): @@ -58,7 +58,7 @@ self.egg_base = (dirs or {}).get('',os.curdir) self.ensure_dirname('egg_base') - self.egg_info = self.egg_name.replace('-','_')+'.egg-info' + self.egg_info = to_filename(self.egg_name)+'.egg-info' if self.egg_base != os.curdir: self.egg_info = os.path.join(self.egg_base, self.egg_info) if '-' in self.egg_name: self.check_broken_egg_info() Modified: sandbox/trunk/setuptools/setuptools/package_index.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/package_index.py (original) +++ sandbox/trunk/setuptools/setuptools/package_index.py Tue Jan 10 05:00:54 2006 @@ -1,6 +1,6 @@ """PyPI and direct package downloading""" -import sys, os.path, re, urlparse, urllib2 +import sys, os.path, re, urlparse, urllib2, shutil from pkg_resources import * from distutils import log from distutils.errors import DistutilsError @@ -39,12 +39,15 @@ return base,py_ver -def distros_for_url(url, metadata=None): - """Yield egg or source distribution objects that might be found at a URL""" - +def egg_info_for_url(url): scheme, server, path, parameters, query, fragment = urlparse.urlparse(url) base = urllib2.unquote(path.split('/')[-1]) if '#' in base: base, fragment = base.split('#',1) + return base,fragment + +def distros_for_url(url, metadata=None): + """Yield egg or source distribution objects that might be found at a URL""" + base, fragment = egg_info_for_url(url) dists = distros_for_location(url, base, metadata) if fragment and not dists: match = EGG_FRAGMENT.match(fragment) @@ -54,12 +57,10 @@ ) return dists - def distros_for_location(location, basename, metadata=None): """Yield egg or source distribution objects based on basename""" if basename.endswith('.egg.zip'): basename = basename[:-4] # strip the .zip - if basename.endswith('.egg'): # only one, unambiguous interpretation return [Distribution.from_location(location, basename, metadata)] @@ -76,7 +77,6 @@ if basename.endswith(ext): basename = basename[:-len(ext)] return interpret_distro_name(location, basename, metadata) - return [] # no extension matched @@ -205,7 +205,6 @@ def process_index(self,url,page): """Process the contents of a PyPI page""" - def scan(link): # Process a URL to see if it's for a package page if link.startswith(self.index_url): @@ -217,14 +216,15 @@ pkg = safe_name(parts[0]) ver = safe_version(parts[1]) self.package_pages.setdefault(pkg.lower(),{})[link] = True + return to_filename(pkg), to_filename(ver) + return None, None if url==self.index_url or 'Index of Packages' in page: # process an index page into the package-page index for match in HREF.finditer(page): scan( urlparse.urljoin(url, match.group(1)) ) - else: - scan(url) # ensure this page is in the page index + pkg,ver = scan(url) # ensure this page is in the page index # process individual package page for tag in ("Home Page", "Download URL"): pos = page.find(tag) @@ -232,35 +232,44 @@ match = HREF.search(page,pos) if match: # Process the found URL - self.scan_url(urlparse.urljoin(url, match.group(1))) - + new_url = urlparse.urljoin(url, match.group(1)) + base, frag = egg_info_for_url(new_url) + if base.endswith('.py') and not frag: + if pkg and ver: + new_url+='#egg=%s-%s' % (pkg,ver) + else: + self.need_version_info(url) + self.scan_url(new_url) return PYPI_MD5.sub( lambda m: '%s' % m.group(1,3,2), page ) + def need_version_info(self, url): + self.scan_all( + "Page at %s links to .py file(s) without version info; an index " + "scan is required.", url + ) - - - - + def scan_all(self, msg, *args): + if self.index_url not in self.fetched_urls: + if msg: self.warn(msg,*args) + self.warn( + "Scanning index of all packages (this may take a while)" + ) + self.scan_url(self.index_url) def find_packages(self, requirement): self.scan_url(self.index_url + requirement.unsafe_name+'/') if not self.package_pages.get(requirement.key): # Fall back to safe version of the name self.scan_url(self.index_url + requirement.project_name+'/') - if not self.package_pages.get(requirement.key): # We couldn't find the target package, so search the index page too self.warn( "Couldn't find index page for %r (maybe misspelled?)", requirement.unsafe_name ) - if self.index_url not in self.fetched_urls: - self.warn( - "Scanning index of all packages (this may take a while)" - ) - self.scan_url(self.index_url) + self.scan_all() for url in self.package_pages.get(requirement.key,()): # scan each page that might be related to the desired package @@ -274,6 +283,8 @@ self.debug("%s does not match %s", requirement, dist) return super(PackageIndex, self).obtain(requirement,installer) + + def check_md5(self, cs, info, filename, tfp): if re.match('md5=[0-9a-f]{32}$', info): self.debug("Validating md5 checksum for %s", filename) @@ -290,7 +301,10 @@ `spec` may be a ``Requirement`` object, or a string containing a URL, an existing local filename, or a project/version requirement spec - (i.e. the string form of a ``Requirement`` object). + (i.e. the string form of a ``Requirement`` object). If it is the URL + of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one + that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is + automatically created alongside the downloaded file. If `spec` is a ``Requirement`` object or a string containing a project/version requirement spec, this method is equivalent to @@ -304,8 +318,11 @@ scheme = URL_SCHEME(spec) if scheme: # It's a url, download it to tmpdir - return self._download_url(scheme.group(1), spec, tmpdir) - + found = self._download_url(scheme.group(1), spec, tmpdir) + base, fragment = egg_info_for_url(spec) + if base.endswith('.py'): + found = self.gen_setup(found,fragment,tmpdir) + return found elif os.path.exists(spec): # Existing file or directory, just return it return spec @@ -317,15 +334,9 @@ "Not a URL, existing file, or requirement spec: %r" % (spec,) ) - return self.fetch(spec, tmpdir) - - - - - def fetch(self, requirement, tmpdir, force_scan=False, source=False): """Obtain a file suitable for fulfilling `requirement` @@ -367,6 +378,47 @@ return dist + def gen_setup(self, filename, fragment, tmpdir): + match = EGG_FRAGMENT.match(fragment); #import pdb; pdb.set_trace() + dists = match and [d for d in + interpret_distro_name(filename, match.group(1), None) if d.version + ] or [] + + if len(dists)==1: # unambiguous ``#egg`` fragment + basename = os.path.basename(filename) + + # Make sure the file has been downloaded to the temp dir. + if os.path.dirname(filename) != tmpdir: + dst = os.path.join(tmpdir, basename) + from setuptools.command.easy_install import samefile + if not samefile(filename, dst): + shutil.copy2(filename, dst) + filename=dst + + file = open(os.path.join(tmpdir, 'setup.py'), 'w') + file.write( + "from setuptools import setup\n" + "setup(name=%r, version=%r, py_modules=[%r])\n" + % ( + dists[0].project_name, dists[0].version, + os.path.splitext(basename)[0] + ) + ) + file.close() + return filename + + elif match: + raise DistutilsError( + "Can't unambiguously interpret project/version identifier %r; " + "any dashes in the name or version should be escaped using " + "underscores. %r" % (fragment,dists) + ) + else: + raise DistutilsError( + "Can't process plain .py files without an '#egg=name-version'" + " suffix to enable automatic setup script generation." + ) + dl_blocksize = 8192 def _download_to(self, url, filename): self.url_ok(url,True) # raises error if not allowed From python-checkins at python.org Tue Jan 10 07:03:15 2006 From: python-checkins at python.org (neal.norwitz) Date: Tue, 10 Jan 2006 07:03:15 +0100 (CET) Subject: [Python-checkins] r41996 - in python/trunk: Misc/NEWS Objects/unicodeobject.c Message-ID: <20060110060315.205A71E4009@bag.python.org> Author: neal.norwitz Date: Tue Jan 10 07:03:13 2006 New Revision: 41996 Modified: python/trunk/Misc/NEWS python/trunk/Objects/unicodeobject.c Log: - Patch #1400181, fix unicode string formatting to not use the locale. This is how string objects work. u'%f' could use , instead of . for the decimal point. Now both strings and unicode always use periods. This is the code that would break: import locale locale.setlocale(locale.LC_NUMERIC, 'de_DE') u'%.1f' % 1.0 assert '1.0' == u'%.1f' % 1.0 I couldn't create a test case which fails, but this fixes the problem. Will backport. Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Tue Jan 10 07:03:13 2006 @@ -12,6 +12,10 @@ Core and builtins ----------------- +- Patch #1400181, fix unicode string formatting to not use the locale. + This is how string objects work. u'%f' could use , instead of . + for the decimal point. Now both strings and unicode always use periods. + - Bug #1244610, #1392915, fix build problem on OpenBSD 3.7 and 3.8. configure would break checking curses.h. Modified: python/trunk/Objects/unicodeobject.c ============================================================================== --- python/trunk/Objects/unicodeobject.c (original) +++ python/trunk/Objects/unicodeobject.c Tue Jan 10 07:03:13 2006 @@ -6579,26 +6579,31 @@ #define F_ALT (1<<3) #define F_ZERO (1<<4) -static -int usprintf(register Py_UNICODE *buffer, char *format, ...) +static int +strtounicode(Py_UNICODE *buffer, const char *charbuffer) { - register int i; - int len; - va_list va; - char *charbuffer; - va_start(va, format); - - /* First, format the string as char array, then expand to Py_UNICODE - array. */ - charbuffer = (char *)buffer; - len = vsprintf(charbuffer, format, va); + register long i; + long len = strlen(charbuffer); for (i = len - 1; i >= 0; i--) buffer[i] = (Py_UNICODE) charbuffer[i]; - va_end(va); return len; } +static int +doubletounicode(Py_UNICODE *buffer, size_t len, const char *format, double x) +{ + PyOS_ascii_formatd((char *)buffer, len, format, x); + return strtounicode(buffer, (char *)buffer); +} + +static int +longtounicode(Py_UNICODE *buffer, size_t len, const char *format, long x) +{ + PyOS_snprintf((char *)buffer, len, format, x); + return strtounicode(buffer, (char *)buffer); +} + /* XXX To save some code duplication, formatfloat/long/int could have been shared with stringobject.c, converting from 8-bit to Unicode after the formatting is done. */ @@ -6648,7 +6653,7 @@ PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type); - return usprintf(buf, fmt, x); + return doubletounicode(buf, buflen, fmt, x); } static PyObject* @@ -6740,9 +6745,9 @@ prec, type); } if (sign[0]) - return usprintf(buf, fmt, -x); + return longtounicode(buf, buflen, fmt, -x); else - return usprintf(buf, fmt, x); + return longtounicode(buf, buflen, fmt, x); } static int From python-checkins at python.org Tue Jan 10 07:05:58 2006 From: python-checkins at python.org (neal.norwitz) Date: Tue, 10 Jan 2006 07:05:58 +0100 (CET) Subject: [Python-checkins] r41997 - in python/branches/release24-maint: Misc/NEWS Objects/unicodeobject.c Message-ID: <20060110060558.8996E1E4002@bag.python.org> Author: neal.norwitz Date: Tue Jan 10 07:05:57 2006 New Revision: 41997 Modified: python/branches/release24-maint/Misc/NEWS python/branches/release24-maint/Objects/unicodeobject.c Log: Backport: - Patch #1400181, fix unicode string formatting to not use the locale. This is how string objects work. u'%f' could use , instead of . for the decimal point. Now both strings and unicode always use periods. This is the code that would break: import locale locale.setlocale(locale.LC_NUMERIC, 'de_DE') u'%.1f' % 1.0 assert '1.0' == u'%.1f' % 1.0 I couldn't create a test case which fails, but this fixes the problem. (tested in interpreter and reported fixed by others) Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Tue Jan 10 07:05:57 2006 @@ -12,6 +12,10 @@ Core and builtins ----------------- +- Patch #1400181, fix unicode string formatting to not use the locale. + This is how string objects work. u'%f' could use , instead of . + for the decimal point. Now both strings and unicode always use periods. + - Bug #1244610, #1392915, fix build problem on OpenBSD 3.7 and 3.8. configure would break checking curses.h. Modified: python/branches/release24-maint/Objects/unicodeobject.c ============================================================================== --- python/branches/release24-maint/Objects/unicodeobject.c (original) +++ python/branches/release24-maint/Objects/unicodeobject.c Tue Jan 10 07:05:57 2006 @@ -6538,26 +6538,31 @@ #define F_ALT (1<<3) #define F_ZERO (1<<4) -static -int usprintf(register Py_UNICODE *buffer, char *format, ...) +static int +strtounicode(Py_UNICODE *buffer, const char *charbuffer) { - register int i; - int len; - va_list va; - char *charbuffer; - va_start(va, format); - - /* First, format the string as char array, then expand to Py_UNICODE - array. */ - charbuffer = (char *)buffer; - len = vsprintf(charbuffer, format, va); + register long i; + long len = strlen(charbuffer); for (i = len - 1; i >= 0; i--) buffer[i] = (Py_UNICODE) charbuffer[i]; - va_end(va); return len; } +static int +doubletounicode(Py_UNICODE *buffer, size_t len, const char *format, double x) +{ + PyOS_ascii_formatd((char *)buffer, len, format, x); + return strtounicode(buffer, (char *)buffer); +} + +static int +longtounicode(Py_UNICODE *buffer, size_t len, const char *format, long x) +{ + PyOS_snprintf((char *)buffer, len, format, x); + return strtounicode(buffer, (char *)buffer); +} + /* XXX To save some code duplication, formatfloat/long/int could have been shared with stringobject.c, converting from 8-bit to Unicode after the formatting is done. */ @@ -6607,7 +6612,7 @@ PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type); - return usprintf(buf, fmt, x); + return doubletounicode(buf, buflen, fmt, x); } static PyObject* @@ -6699,9 +6704,9 @@ prec, type); } if (sign[0]) - return usprintf(buf, fmt, -x); + return longtounicode(buf, buflen, fmt, -x); else - return usprintf(buf, fmt, x); + return longtounicode(buf, buflen, fmt, x); } static int From python-checkins at python.org Tue Jan 10 08:05:46 2006 From: python-checkins at python.org (neal.norwitz) Date: Tue, 10 Jan 2006 08:05:46 +0100 (CET) Subject: [Python-checkins] r41998 - in python/trunk: Lib/test/test_curses.py Misc/ACKS Misc/NEWS Modules/_cursesmodule.c Message-ID: <20060110070546.3C1241E4002@bag.python.org> Author: neal.norwitz Date: Tue Jan 10 08:05:44 2006 New Revision: 41998 Modified: python/trunk/Lib/test/test_curses.py python/trunk/Misc/ACKS python/trunk/Misc/NEWS python/trunk/Modules/_cursesmodule.c Log: SF bug #1400822, Extended version of _curses over{lay,write} does not work Fix signatures to conform to doc (also fixed ungetmouse()). Will backport. Modified: python/trunk/Lib/test/test_curses.py ============================================================================== --- python/trunk/Lib/test/test_curses.py (original) +++ python/trunk/Lib/test/test_curses.py Tue Jan 10 08:05:44 2006 @@ -5,7 +5,8 @@ # does call every method and function. # # Functions not tested: {def,reset}_{shell,prog}_mode, getch(), getstr(), -# getmouse(), ungetmouse(), init_color() +# init_color() +# Only called, not tested: getmouse(), ungetmouse() # import curses, sys, tempfile, os @@ -106,6 +107,8 @@ stdscr.notimeout(1) win2.overlay(win) win2.overwrite(win) + win2.overlay(win, 1, 2, 3, 3, 2, 1) + win2.overwrite(win, 1, 2, 3, 3, 2, 1) stdscr.redrawln(1,2) stdscr.scrollok(1) @@ -201,6 +204,9 @@ if hasattr(curses, 'getmouse'): curses.mousemask(curses.BUTTON1_PRESSED) curses.mouseinterval(10) + # just verify these don't cause errors + m = curses.getmouse() + curses.ungetmouse(*m) def unit_tests(): from curses import ascii Modified: python/trunk/Misc/ACKS ============================================================================== --- python/trunk/Misc/ACKS (original) +++ python/trunk/Misc/ACKS Tue Jan 10 08:05:44 2006 @@ -552,6 +552,7 @@ Nathan Paul Simons Janne Sinkkonen George Sipe +J. Sipprell Kragen Sitaker Christopher Smith Gregory P. Smith Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Tue Jan 10 08:05:44 2006 @@ -216,6 +216,10 @@ Extension Modules ----------------- +- Bug #1400822, _curses over{lay,write} doesn't work when passing 6 ints. + Also fix ungetmouse() which did not accept arguments properly. + The code now conforms to the documented signature. + - Bug #1400115, Fix segfault when calling curses.panel.userptr() without prior setting of the userptr. Modified: python/trunk/Modules/_cursesmodule.c ============================================================================== --- python/trunk/Modules/_cursesmodule.c (original) +++ python/trunk/Modules/_cursesmodule.c Tue Jan 10 08:05:44 2006 @@ -1163,7 +1163,7 @@ return NULL; break; case 7: - if (!PyArg_ParseTuple(args, "(O!iiiiii);window object, int, int, int, int, int, int", + if (!PyArg_ParseTuple(args, "O!iiiiii;window object, int, int, int, int, int, int", &PyCursesWindow_Type, &temp, &sminrow, &smincol, &dminrow, &dmincol, &dmaxrow, &dmaxcol)) return NULL; @@ -1201,7 +1201,7 @@ return NULL; break; case 7: - if (!PyArg_ParseTuple(args, "(O!iiiiii);window object, int, int, int, int, int, int", + if (!PyArg_ParseTuple(args, "O!iiiiii;window object, int, int, int, int, int, int", &PyCursesWindow_Type, &temp, &sminrow, &smincol, &dminrow, &dmincol, &dmaxrow, &dmaxcol)) return NULL; @@ -1674,7 +1674,7 @@ MEVENT event; PyCursesInitialised - if (!PyArg_ParseTuple(args, "(hiiil)", + if (!PyArg_ParseTuple(args, "hiiil", &event.id, &event.x, &event.y, &event.z, (int *) &event.bstate)) From python-checkins at python.org Tue Jan 10 08:08:08 2006 From: python-checkins at python.org (neal.norwitz) Date: Tue, 10 Jan 2006 08:08:08 +0100 (CET) Subject: [Python-checkins] r41999 - in python/branches/release24-maint: Lib/test/test_curses.py Misc/ACKS Misc/NEWS Modules/_cursesmodule.c Message-ID: <20060110070808.1774A1E4002@bag.python.org> Author: neal.norwitz Date: Tue Jan 10 08:08:06 2006 New Revision: 41999 Modified: python/branches/release24-maint/Lib/test/test_curses.py python/branches/release24-maint/Misc/ACKS python/branches/release24-maint/Misc/NEWS python/branches/release24-maint/Modules/_cursesmodule.c Log: Backport: SF bug #1400822, Extended version of _curses over{lay,write} does not work Fix signatures to conform to doc (also fixed ungetmouse()). Modified: python/branches/release24-maint/Lib/test/test_curses.py ============================================================================== --- python/branches/release24-maint/Lib/test/test_curses.py (original) +++ python/branches/release24-maint/Lib/test/test_curses.py Tue Jan 10 08:08:06 2006 @@ -5,7 +5,8 @@ # does call every method and function. # # Functions not tested: {def,reset}_{shell,prog}_mode, getch(), getstr(), -# getmouse(), ungetmouse(), init_color() +# init_color() +# Only called, not tested: getmouse(), ungetmouse() # import curses, sys, tempfile, os @@ -106,6 +107,8 @@ stdscr.notimeout(1) win2.overlay(win) win2.overwrite(win) + win2.overlay(win, 1, 2, 3, 3, 2, 1) + win2.overwrite(win, 1, 2, 3, 3, 2, 1) stdscr.redrawln(1,2) stdscr.scrollok(1) @@ -201,6 +204,9 @@ if hasattr(curses, 'getmouse'): curses.mousemask(curses.BUTTON1_PRESSED) curses.mouseinterval(10) + # just verify these don't cause errors + m = curses.getmouse() + curses.ungetmouse(*m) def unit_tests(): from curses import ascii Modified: python/branches/release24-maint/Misc/ACKS ============================================================================== --- python/branches/release24-maint/Misc/ACKS (original) +++ python/branches/release24-maint/Misc/ACKS Tue Jan 10 08:08:06 2006 @@ -543,6 +543,7 @@ Nathan Paul Simons Janne Sinkkonen George Sipe +J. Sipprell Kragen Sitaker Christopher Smith Gregory P. Smith Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Tue Jan 10 08:08:06 2006 @@ -200,6 +200,10 @@ Extension Modules ----------------- +- Bug #1400822, _curses over{lay,write} doesn't work when passing 6 ints. + Also fix ungetmouse() which did not accept arguments properly. + The code now conforms to the documented signature. + - Bug #1400115, Fix segfault when calling curses.panel.userptr() without prior setting of the userptr. Modified: python/branches/release24-maint/Modules/_cursesmodule.c ============================================================================== --- python/branches/release24-maint/Modules/_cursesmodule.c (original) +++ python/branches/release24-maint/Modules/_cursesmodule.c Tue Jan 10 08:08:06 2006 @@ -1163,7 +1163,7 @@ return NULL; break; case 7: - if (!PyArg_ParseTuple(args, "(O!iiiiii);window object, int, int, int, int, int, int", + if (!PyArg_ParseTuple(args, "O!iiiiii;window object, int, int, int, int, int, int", &PyCursesWindow_Type, &temp, &sminrow, &smincol, &dminrow, &dmincol, &dmaxrow, &dmaxcol)) return NULL; @@ -1201,7 +1201,7 @@ return NULL; break; case 7: - if (!PyArg_ParseTuple(args, "(O!iiiiii);window object, int, int, int, int, int, int", + if (!PyArg_ParseTuple(args, "O!iiiiii;window object, int, int, int, int, int, int", &PyCursesWindow_Type, &temp, &sminrow, &smincol, &dminrow, &dmincol, &dmaxrow, &dmaxcol)) return NULL; @@ -1674,7 +1674,7 @@ MEVENT event; PyCursesInitialised - if (!PyArg_ParseTuple(args, "(hiiil)", + if (!PyArg_ParseTuple(args, "hiiil", &event.id, &event.x, &event.y, &event.z, (int *) &event.bstate)) From python-checkins at python.org Tue Jan 10 08:49:41 2006 From: python-checkins at python.org (neal.norwitz) Date: Tue, 10 Jan 2006 08:49:41 +0100 (CET) Subject: [Python-checkins] r42000 - python/trunk/Lib/test/crashers python/trunk/Lib/test/crashers/README python/trunk/Lib/test/crashers/coerce.py python/trunk/Lib/test/crashers/weakref_in_del.py python/trunk/Lib/test/crashers/xml_parsers.py Message-ID: <20060110074941.80D491E4002@bag.python.org> Author: neal.norwitz Date: Tue Jan 10 08:49:41 2006 New Revision: 42000 Added: python/trunk/Lib/test/crashers/ python/trunk/Lib/test/crashers/README python/trunk/Lib/test/crashers/coerce.py python/trunk/Lib/test/crashers/weakref_in_del.py python/trunk/Lib/test/crashers/xml_parsers.py Log: As I threatened on python-dev, add a directory which contains all known bugs which cause the interpreter to crash. I'm sure we can find a few more. Many missing bugs deal with variations on unchecked infinite recursion (like coerce.py). Added: python/trunk/Lib/test/crashers/README ============================================================================== --- (empty file) +++ python/trunk/Lib/test/crashers/README Tue Jan 10 08:49:41 2006 @@ -0,0 +1,15 @@ +This directory only contains tests for outstanding bugs that cause +the interpreter to segfault. Ideally this directory should always +be empty. Sometimes it may not be easy to fix the underlying cause. + +Each test should fail when run from the command line: + + ./python Lib/test/crashers/weakref_in_del.py + +Each test should have a link to the bug report: + + # http://python.org/sf/BUG# + +Put as much info into a docstring or comments to help determine +the cause of the failure. Particularly note if the cause is +system or environment dependent and what the variables are. Added: python/trunk/Lib/test/crashers/coerce.py ============================================================================== --- (empty file) +++ python/trunk/Lib/test/crashers/coerce.py Tue Jan 10 08:49:41 2006 @@ -0,0 +1,9 @@ + +# http://python.org/sf/992017 + +class foo: + def __coerce__(self, other): + return other, self + +if __name__ == '__main__': + foo()+1 # segfault: infinite recursion in C Added: python/trunk/Lib/test/crashers/weakref_in_del.py ============================================================================== --- (empty file) +++ python/trunk/Lib/test/crashers/weakref_in_del.py Tue Jan 10 08:49:41 2006 @@ -0,0 +1,17 @@ +import weakref + +# http://python.org/sf/1377858 + +ref = None + +def test_weakref_in_del(): + class Target(object): + def __del__(self): + global ref + ref = weakref.ref(self) + + w = Target() + +if __name__ == '__main__': + test_weakref_in_del() + Added: python/trunk/Lib/test/crashers/xml_parsers.py ============================================================================== --- (empty file) +++ python/trunk/Lib/test/crashers/xml_parsers.py Tue Jan 10 08:49:41 2006 @@ -0,0 +1,56 @@ +from xml.parsers import expat + +# http://python.org/sf/1296433 + +def test_parse_only_xml_data(): + # + xml = "%s" % ('a' * 1025) + # this one doesn't crash + #xml = "%s" % ('a' * 10000) + + def handler(text): + raise Exception + + parser = expat.ParserCreate() + parser.CharacterDataHandler = handler + + try: + parser.Parse(xml) + except: + pass + +if __name__ == '__main__': + test_parse_only_xml_data() + +# Invalid read of size 4 +# at 0x43F936: PyObject_Free (obmalloc.c:735) +# by 0x45A7C7: unicode_dealloc (unicodeobject.c:246) +# by 0x1299021D: PyUnknownEncodingHandler (pyexpat.c:1314) +# by 0x12993A66: processXmlDecl (xmlparse.c:3330) +# by 0x12999211: doProlog (xmlparse.c:3678) +# by 0x1299C3F0: prologInitProcessor (xmlparse.c:3550) +# by 0x12991EA3: XML_ParseBuffer (xmlparse.c:1562) +# by 0x1298F8EC: xmlparse_Parse (pyexpat.c:895) +# by 0x47B3A1: PyEval_EvalFrameEx (ceval.c:3565) +# by 0x47CCAC: PyEval_EvalCodeEx (ceval.c:2739) +# by 0x47CDE1: PyEval_EvalCode (ceval.c:490) +# by 0x499820: PyRun_SimpleFileExFlags (pythonrun.c:1198) +# by 0x4117F1: Py_Main (main.c:492) +# by 0x12476D1F: __libc_start_main (in /lib/libc-2.3.5.so) +# by 0x410DC9: (within /home/neal/build/python/svn/clean/python) +# Address 0x12704020 is 264 bytes inside a block of size 592 free'd +# at 0x11B1BA8A: free (vg_replace_malloc.c:235) +# by 0x124B5F18: (within /lib/libc-2.3.5.so) +# by 0x48DE43: find_module (import.c:1320) +# by 0x48E997: import_submodule (import.c:2249) +# by 0x48EC15: load_next (import.c:2083) +# by 0x48F091: import_module_ex (import.c:1914) +# by 0x48F385: PyImport_ImportModuleEx (import.c:1955) +# by 0x46D070: builtin___import__ (bltinmodule.c:44) +# by 0x4186CF: PyObject_Call (abstract.c:1777) +# by 0x474E9B: PyEval_CallObjectWithKeywords (ceval.c:3432) +# by 0x47928E: PyEval_EvalFrameEx (ceval.c:2038) +# by 0x47CCAC: PyEval_EvalCodeEx (ceval.c:2739) +# by 0x47CDE1: PyEval_EvalCode (ceval.c:490) +# by 0x48D0F7: PyImport_ExecCodeModuleEx (import.c:635) +# by 0x48D4F4: load_source_module (import.c:913) From mal at egenix.com Tue Jan 10 10:21:22 2006 From: mal at egenix.com (M.-A. Lemburg) Date: Tue, 10 Jan 2006 10:21:22 +0100 Subject: [Python-checkins] r41971 - in python/branches/ssize_t: Include/abstract.hInclude/stringobject.hObjects/classobject.c Objects/enumobject.cObjects/iterobject.cObjects/setobject.c Objects/typeobject.c Objects/weakrefobject.c Python/pystrtod.c In-Reply-To: <43C2DD44.2040908@v.loewis.de> References: <20060108054820.15F321E4002@bag.python.org> <43C25CE1.4070106@egenix.com> <43C2DD44.2040908@v.loewis.de> Message-ID: <43C37C92.3060302@egenix.com> Martin v. L?wis wrote: > M.-A. Lemburg wrote: >> Changes like these (variables passed in by reference) should be >> handle more carefully and also discussed on python-dev. > > The change was discussed on python-dev; it is part of the PEP > (which still awaits a number). > > Notice that the change is in the ssize_t-branch only so far. I know that it's only in the branch, but the branch will eventually get merged into the trunk. I saw the short discussion on python-dev about the PyArg_ParseTuple() problems, but it did not touch the issue of other output parameter API changes. I've posted such a message now, to see what others think. >> The reason is that these changes will introduce major changes >> in extensions using these APIs. > > I don't believe the change is major. It only affects a few extensions, > and for these, it is only a minor change. A single line of changing > will be enough. This is true for all the changes related to parameters passed by value. It is not true for output parameters. For these, the change will propagate into the extension and make quite a few changes necessary (for the same reason you have to change so many variables to Py_ssize_t). We should make it possible to have a simple recompile of old extensions continue to work with Python 2.5 (albeit without them supporting 64-bit indexes) and without introducing segfaults or buffer overflows that way. -- Marc-Andre Lemburg eGenix.com Professional Python Services directly from the Source (#1, Jan 10 2006) >>> Python/Zope Consulting and Support ... http://www.egenix.com/ >>> mxODBC.Zope.Database.Adapter ... http://zope.egenix.com/ >>> mxODBC, mxDateTime, mxTextTools ... http://python.egenix.com/ ________________________________________________________________________ ::: Try mxODBC.Zope.DA for Windows,Linux,Solaris,FreeBSD for free ! :::: From python-checkins at python.org Tue Jan 10 14:39:32 2006 From: python-checkins at python.org (andrew.kuchling) Date: Tue, 10 Jan 2006 14:39:32 +0100 (CET) Subject: [Python-checkins] r42001 - sandbox/trunk/pycon/get.sh Message-ID: <20060110133932.D56621E4002@bag.python.org> Author: andrew.kuchling Date: Tue Jan 10 14:39:32 2006 New Revision: 42001 Modified: sandbox/trunk/pycon/get.sh (props changed) Log: Make executable From python-checkins at python.org Tue Jan 10 14:50:01 2006 From: python-checkins at python.org (andrew.kuchling) Date: Tue, 10 Jan 2006 14:50:01 +0100 (CET) Subject: [Python-checkins] r42002 - sandbox/trunk/pycon/talks.py Message-ID: <20060110135001.B31501E4002@bag.python.org> Author: andrew.kuchling Date: Tue Jan 10 14:50:01 2006 New Revision: 42002 Modified: sandbox/trunk/pycon/talks.py Log: Rename two talks to avoid confusion Modified: sandbox/trunk/pycon/talks.py ============================================================================== --- sandbox/trunk/pycon/talks.py (original) +++ sandbox/trunk/pycon/talks.py Tue Jan 10 14:50:01 2006 @@ -17,7 +17,7 @@ 21: 'Developing an Internationalized Application in Python: Chandler a case study', 23: 'Processing XML with ElementTree', 24: 'What is Nabu?', - 25: 'TurboGears Tutorial', + 25: 'TurboGears How-To', 26: 'Packaging Programs with py2exe', 29: 'Python in Business : Thyme, a business-oriented Python development framework.', 30: 'Python at Home : In Control', @@ -45,7 +45,7 @@ 59: 'Introduction to CMF Application Development', 60: 'Mission-Critical Python and the Brave New Web', 62: 'Docutils Developers Tutorial: Architecture, Extending, and Embedding', - 63: 'Django tutorial', + 63: 'Django How-To', 64: 'Creating Presentations With Docutils and S5', 65: 'Understanding Unicode', 66: 'Building Pluggable Software with Eggs', From python-checkins at python.org Tue Jan 10 20:29:28 2006 From: python-checkins at python.org (georg.brandl) Date: Tue, 10 Jan 2006 20:29:28 +0100 (CET) Subject: [Python-checkins] r42003 - python/trunk/Lib/test/outstanding_bugs.py python/trunk/Lib/test/outstanding_crashes.py Message-ID: <20060110192928.F114B1E4002@bag.python.org> Author: georg.brandl Date: Tue Jan 10 20:29:24 2006 New Revision: 42003 Added: python/trunk/Lib/test/outstanding_crashes.py Modified: python/trunk/Lib/test/outstanding_bugs.py Log: Add outstanding_crashes.py with tests for crashes. Modified: python/trunk/Lib/test/outstanding_bugs.py ============================================================================== --- python/trunk/Lib/test/outstanding_bugs.py (original) +++ python/trunk/Lib/test/outstanding_bugs.py Tue Jan 10 20:29:24 2006 @@ -22,3 +22,6 @@ def test_main(): test_support.run_unittest(TestBug1385040) + +if __name__ == "__main__": + test_main() Added: python/trunk/Lib/test/outstanding_crashes.py ============================================================================== --- (empty file) +++ python/trunk/Lib/test/outstanding_crashes.py Tue Jan 10 20:29:24 2006 @@ -0,0 +1,38 @@ +# +# This file is for everybody to add tests for crashes that aren't +# fixed yet. Please add a test case and appropriate description. +# +# When you fix one of the crashes, please move the test to the correct +# test_ module. +# + +import unittest +from test import test_support + + +# Bug 1377858 +# +# mwh's description: +# The problem is obvious if you read typeobject.c around line 660: the weakref +# list is cleared before __del__ is called, so any weakrefs added during the +# execution of __del__ are never informed of the object's death. + +import weakref +ref = None + +class TestBug1377858(unittest.TestCase): + class Target(object): + def __del__(self): + global ref + ref = weakref.ref(self) + + def testBug1377858(self): + w = self.__class__.Target() + w = None + print ref() + +def test_main(): + test_support.run_unittest(TestBug1377858) + +if __name__ == "__main__": + test_main() From python-checkins at python.org Tue Jan 10 21:07:14 2006 From: python-checkins at python.org (georg.brandl) Date: Tue, 10 Jan 2006 21:07:14 +0100 (CET) Subject: [Python-checkins] r42004 - python/trunk/Lib/test/outstanding_crashes.py Message-ID: <20060110200714.1608A1E4002@bag.python.org> Author: georg.brandl Date: Tue Jan 10 21:07:13 2006 New Revision: 42004 Removed: python/trunk/Lib/test/outstanding_crashes.py Log: Remove outstanding_crashes again. Deleted: /python/trunk/Lib/test/outstanding_crashes.py ============================================================================== --- /python/trunk/Lib/test/outstanding_crashes.py Tue Jan 10 21:07:13 2006 +++ (empty file) @@ -1,38 +0,0 @@ -# -# This file is for everybody to add tests for crashes that aren't -# fixed yet. Please add a test case and appropriate description. -# -# When you fix one of the crashes, please move the test to the correct -# test_ module. -# - -import unittest -from test import test_support - - -# Bug 1377858 -# -# mwh's description: -# The problem is obvious if you read typeobject.c around line 660: the weakref -# list is cleared before __del__ is called, so any weakrefs added during the -# execution of __del__ are never informed of the object's death. - -import weakref -ref = None - -class TestBug1377858(unittest.TestCase): - class Target(object): - def __del__(self): - global ref - ref = weakref.ref(self) - - def testBug1377858(self): - w = self.__class__.Target() - w = None - print ref() - -def test_main(): - test_support.run_unittest(TestBug1377858) - -if __name__ == "__main__": - test_main() From python-checkins at python.org Tue Jan 10 22:21:39 2006 From: python-checkins at python.org (martin.v.loewis) Date: Tue, 10 Jan 2006 22:21:39 +0100 (CET) Subject: [Python-checkins] r42006 - peps/trunk/pep-0353.txt Message-ID: <20060110212139.A97021E4002@bag.python.org> Author: martin.v.loewis Date: Tue Jan 10 22:21:39 2006 New Revision: 42006 Modified: peps/trunk/pep-0353.txt Log: Add open issues section. Modified: peps/trunk/pep-0353.txt ============================================================================== --- peps/trunk/pep-0353.txt (original) +++ peps/trunk/pep-0353.txt Tue Jan 10 22:21:39 2006 @@ -229,6 +229,18 @@ type on most 64-bit systems (except Win64), so the compiler inserts padding before it as well. +Open Issues +=========== + +* Marc-Andre Lemburg commented that complete backwards + compatibility with existing source code should be + preserved. In particular, functions that have + Py_ssize_t* output arguments should continue to run + correctly even if the callers pass int*. + + It is not clear what strategy could be used to implement + that requirement. + Copyright ========= From python-checkins at python.org Tue Jan 10 22:37:28 2006 From: python-checkins at python.org (georg.brandl) Date: Tue, 10 Jan 2006 22:37:28 +0100 (CET) Subject: [Python-checkins] r42007 - python/trunk/Doc/lib/xmlsaxhandler.tex Message-ID: <20060110213728.9F7721E4002@bag.python.org> Author: georg.brandl Date: Tue Jan 10 22:37:26 2006 New Revision: 42007 Modified: python/trunk/Doc/lib/xmlsaxhandler.tex Log: Bug #1397205: doc typo Modified: python/trunk/Doc/lib/xmlsaxhandler.tex ============================================================================== --- python/trunk/Doc/lib/xmlsaxhandler.tex (original) +++ python/trunk/Doc/lib/xmlsaxhandler.tex Tue Jan 10 22:37:26 2006 @@ -14,7 +14,7 @@ only need to implement those interfaces whose events they are interested in; they can implement the interfaces in a single object or in multiple objects. Handler implementations should inherit from the -base classes provided in the module \module{xml.sax}, so that all +base classes provided in the module \module{xml.sax.handler}, so that all methods get default implementations. \begin{classdesc*}{ContentHandler} From python-checkins at python.org Tue Jan 10 22:37:45 2006 From: python-checkins at python.org (georg.brandl) Date: Tue, 10 Jan 2006 22:37:45 +0100 (CET) Subject: [Python-checkins] r42008 - python/branches/release24-maint/Doc/lib/xmlsaxhandler.tex Message-ID: <20060110213745.49A801E4002@bag.python.org> Author: georg.brandl Date: Tue Jan 10 22:37:44 2006 New Revision: 42008 Modified: python/branches/release24-maint/Doc/lib/xmlsaxhandler.tex Log: Bug #1397205: doc typo Modified: python/branches/release24-maint/Doc/lib/xmlsaxhandler.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/xmlsaxhandler.tex (original) +++ python/branches/release24-maint/Doc/lib/xmlsaxhandler.tex Tue Jan 10 22:37:44 2006 @@ -14,7 +14,7 @@ only need to implement those interfaces whose events they are interested in; they can implement the interfaces in a single object or in multiple objects. Handler implementations should inherit from the -base classes provided in the module \module{xml.sax}, so that all +base classes provided in the module \module{xml.sax.handler}, so that all methods get default implementations. \begin{classdesc*}{ContentHandler} From python-checkins at python.org Wed Jan 11 01:14:30 2006 From: python-checkins at python.org (fredrik.lundh) Date: Wed, 11 Jan 2006 01:14:30 +0100 (CET) Subject: [Python-checkins] r42009 - python/trunk/Doc/lib/libmodulefinder.tex python/trunk/Doc/lib/libpickletools.tex python/trunk/Doc/lib/libstringprep.tex Message-ID: <20060111001430.7206C1E4002@bag.python.org> Author: fredrik.lundh Date: Wed Jan 11 01:14:29 2006 New Revision: 42009 Modified: python/trunk/Doc/lib/libmodulefinder.tex python/trunk/Doc/lib/libpickletools.tex python/trunk/Doc/lib/libstringprep.tex Log: added a few missing \versionadded{2.3} tags Modified: python/trunk/Doc/lib/libmodulefinder.tex ============================================================================== --- python/trunk/Doc/lib/libmodulefinder.tex (original) +++ python/trunk/Doc/lib/libmodulefinder.tex Wed Jan 11 01:14:29 2006 @@ -5,6 +5,8 @@ \declaremodule{standard}{modulefinder} \modulesynopsis{Find modules used by a script.} +\versionadded{2.3} + This module provides a \class{ModuleFinder} class that can be used to determine the set of modules imported by a script. \code{modulefinder.py} can also be run as a script, giving the Modified: python/trunk/Doc/lib/libpickletools.tex ============================================================================== --- python/trunk/Doc/lib/libpickletools.tex (original) +++ python/trunk/Doc/lib/libpickletools.tex Wed Jan 11 01:14:29 2006 @@ -3,6 +3,8 @@ \declaremodule{standard}{pickletools} \modulesynopsis{Contains extensive comments about the pickle protocols and pickle-machine opcodes, as well as some useful functions.} +\versionadded{2.3} + This module contains various constants relating to the intimate details of the \refmodule{pickle} module, some lengthy comments about the implementation, and a few useful functions for analyzing pickled Modified: python/trunk/Doc/lib/libstringprep.tex ============================================================================== --- python/trunk/Doc/lib/libstringprep.tex (original) +++ python/trunk/Doc/lib/libstringprep.tex Wed Jan 11 01:14:29 2006 @@ -6,6 +6,8 @@ \moduleauthor{Martin v. L\"owis}{martin at v.loewis.de} \sectionauthor{Martin v. L\"owis}{martin at v.loewis.de} +\versionadded{2.3} + When identifying things (such as host names) in the internet, it is often necessary to compare such identifications for ``equality''. Exactly how this comparison is executed may depend on From python-checkins at python.org Wed Jan 11 01:18:44 2006 From: python-checkins at python.org (fredrik.lundh) Date: Wed, 11 Jan 2006 01:18:44 +0100 (CET) Subject: [Python-checkins] r42010 - python/trunk/Doc/lib/libsimplexmlrpc.tex Message-ID: <20060111001844.D01101E4002@bag.python.org> Author: fredrik.lundh Date: Wed Jan 11 01:18:43 2006 New Revision: 42010 Modified: python/trunk/Doc/lib/libsimplexmlrpc.tex Log: added a missing +\versionadded{2.2} tag Modified: python/trunk/Doc/lib/libsimplexmlrpc.tex ============================================================================== --- python/trunk/Doc/lib/libsimplexmlrpc.tex (original) +++ python/trunk/Doc/lib/libsimplexmlrpc.tex Wed Jan 11 01:18:43 2006 @@ -6,6 +6,7 @@ \moduleauthor{Brian Quinlan}{brianq at activestate.com} \sectionauthor{Fred L. Drake, Jr.}{fdrake at acm.org} +\versionadded{2.2} The \module{SimpleXMLRPCServer} module provides a basic server framework for XML-RPC servers written in Python. Servers can either From python-checkins at python.org Wed Jan 11 08:21:19 2006 From: python-checkins at python.org (neal.norwitz) Date: Wed, 11 Jan 2006 08:21:19 +0100 (CET) Subject: [Python-checkins] r42011 - python/trunk/Lib/test/crashers/recursive_call.py Message-ID: <20060111072119.84AB11E403A@bag.python.org> Author: neal.norwitz Date: Wed Jan 11 08:21:19 2006 New Revision: 42011 Added: python/trunk/Lib/test/crashers/recursive_call.py Log: add another crash reported by Thomas Wouters Added: python/trunk/Lib/test/crashers/recursive_call.py ============================================================================== --- (empty file) +++ python/trunk/Lib/test/crashers/recursive_call.py Wed Jan 11 08:21:19 2006 @@ -0,0 +1,10 @@ +#!/usr/bin/env python + +# No bug report AFAIK, mail on python-dev on 2006-01-10 +import sys + +sys.setrecursionlimit(1 << 30) +f = lambda f:f(f) + +if __name__ == '__main__': + f(f) From python-checkins at python.org Wed Jan 11 09:54:49 2006 From: python-checkins at python.org (neal.norwitz) Date: Wed, 11 Jan 2006 09:54:49 +0100 (CET) Subject: [Python-checkins] r42012 - in python/trunk: Lib/test/output/test_mmap Lib/test/test_mmap.py Misc/ACKS Misc/NEWS Modules/mmapmodule.c Message-ID: <20060111085449.B51FE1E400F@bag.python.org> Author: neal.norwitz Date: Wed Jan 11 09:54:45 2006 New Revision: 42012 Modified: python/trunk/Lib/test/output/test_mmap python/trunk/Lib/test/test_mmap.py python/trunk/Misc/ACKS python/trunk/Misc/NEWS python/trunk/Modules/mmapmodule.c Log: Fix SF bug #1402308, segfault when using mmap(-1, ...) This didn't crash on Linux, but valgrind complained. I'm not sure if this test is valid on Windows. Will backport. Modified: python/trunk/Lib/test/output/test_mmap ============================================================================== --- python/trunk/Lib/test/output/test_mmap (original) +++ python/trunk/Lib/test/output/test_mmap Wed Jan 11 09:54:45 2006 @@ -31,6 +31,7 @@ Modifying copy-on-write memory map. Ensuring copy-on-write maps cannot be resized. Ensuring invalid access parameter raises exception. + Try opening a bad file descriptor... Ensuring that passing 0 as map length sets map size to current file size. Ensuring that passing 0 as map length sets map size to current file size. Test passed Modified: python/trunk/Lib/test/test_mmap.py ============================================================================== --- python/trunk/Lib/test/test_mmap.py (original) +++ python/trunk/Lib/test/test_mmap.py Wed Jan 11 09:54:45 2006 @@ -281,6 +281,14 @@ except OSError: pass + print ' Try opening a bad file descriptor...' + try: + mmap.mmap(-1, 4096) + except mmap.error: + pass + else: + verify(0, 'expected a mmap.error but did not get it') + # Do a tougher .find() test. SF bug 515943 pointed out that, in 2.2, # searching for data with embedded \0 bytes didn't work. f = open(TESTFN, 'w+') Modified: python/trunk/Misc/ACKS ============================================================================== --- python/trunk/Misc/ACKS (original) +++ python/trunk/Misc/ACKS Wed Jan 11 09:54:45 2006 @@ -529,6 +529,7 @@ Neil Schemenauer David Scherer Gregor Schmid +Ralf Schmitt Peter Schneider-Kamp Sam Schulenburg Stefan Schwarzer Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Wed Jan 11 09:54:45 2006 @@ -216,6 +216,8 @@ Extension Modules ----------------- +- Bug #1402308, (possible) segfault when using mmap.mmap(-1, ...) + - Bug #1400822, _curses over{lay,write} doesn't work when passing 6 ints. Also fix ungetmouse() which did not accept arguments properly. The code now conforms to the documented signature. Modified: python/trunk/Modules/mmapmodule.c ============================================================================== --- python/trunk/Modules/mmapmodule.c (original) +++ python/trunk/Modules/mmapmodule.c Wed Jan 11 09:54:45 2006 @@ -918,6 +918,7 @@ #endif m_obj = PyObject_New (mmap_object, &mmap_object_type); if (m_obj == NULL) {return NULL;} + m_obj->data = NULL; m_obj->size = (size_t) map_size; m_obj->pos = (size_t) 0; m_obj->fd = dup(fd); From g.brandl-nospam at gmx.net Wed Jan 11 19:10:06 2006 From: g.brandl-nospam at gmx.net (Georg Brandl) Date: Wed, 11 Jan 2006 19:10:06 +0100 Subject: [Python-checkins] r42012 - in python/trunk: Lib/test/output/test_mmap Lib/test/test_mmap.py Misc/ACKS Misc/NEWS Modules/mmapmodule.c In-Reply-To: <20060111085449.B51FE1E400F@bag.python.org> References: <20060111085449.B51FE1E400F@bag.python.org> Message-ID: neal.norwitz wrote: > Author: neal.norwitz > Date: Wed Jan 11 09:54:45 2006 > New Revision: 42012 > > Modified: > python/trunk/Lib/test/output/test_mmap > python/trunk/Lib/test/test_mmap.py > python/trunk/Misc/ACKS > python/trunk/Misc/NEWS > python/trunk/Modules/mmapmodule.c > Log: > Fix SF bug #1402308, segfault when using mmap(-1, ...) > > This didn't crash on Linux, but valgrind complained. > I'm not sure if this test is valid on Windows. You didn't fix the other problem, that dup() mustn't be called when the fd is -1, cause it errors out then. So either we check for -1 or for MAP_ANON. Georg From nnorwitz at gmail.com Wed Jan 11 20:41:50 2006 From: nnorwitz at gmail.com (Neal Norwitz) Date: Wed, 11 Jan 2006 11:41:50 -0800 Subject: [Python-checkins] r42012 - in python/trunk: Lib/test/output/test_mmap Lib/test/test_mmap.py Misc/ACKS Misc/NEWS Modules/mmapmodule.c In-Reply-To: References: <20060111085449.B51FE1E400F@bag.python.org> Message-ID: On 1/11/06, Georg Brandl wrote: > > You didn't fix the other problem, that dup() mustn't be called > when the fd is -1, cause it errors out then. > > So either we check for -1 or for MAP_ANON. Right, that seems like a feature enhancement. If you pass -1 now, an exception is raised rather than a crash. I didn't see anything in the docs which say we support -1 for anon maps. I mentioned it in the bug report that it would be better to open a new patch with the code to add this feature for both unix and windows (not sure how Windows works) and also update the doc. I should have mentioned that he should add a test too. n From python-checkins at python.org Thu Jan 12 04:30:12 2006 From: python-checkins at python.org (david.goodger) Date: Thu, 12 Jan 2006 04:30:12 +0100 (CET) Subject: [Python-checkins] r42013 - peps/trunk Message-ID: <20060112033012.F2F261E4002@bag.python.org> Author: david.goodger Date: Thu Jan 12 04:30:11 2006 New Revision: 42013 Modified: peps/trunk/ (props changed) Log: fix props From python-checkins at python.org Thu Jan 12 04:30:26 2006 From: python-checkins at python.org (david.goodger) Date: Thu, 12 Jan 2006 04:30:26 +0100 (CET) Subject: [Python-checkins] r42014 - peps/trunk/docutils Message-ID: <20060112033026.39CB11E4002@bag.python.org> Author: david.goodger Date: Thu Jan 12 04:30:25 2006 New Revision: 42014 Removed: peps/trunk/docutils/ Log: delete outdated copy of Docutils From python-checkins at python.org Thu Jan 12 04:33:16 2006 From: python-checkins at python.org (david.goodger) Date: Thu, 12 Jan 2006 04:33:16 +0100 (CET) Subject: [Python-checkins] r42015 - peps/trunk Message-ID: <20060112033316.E24C31E4002@bag.python.org> Author: david.goodger Date: Thu Jan 12 04:33:16 2006 New Revision: 42015 Modified: peps/trunk/ (props changed) Log: add external link to Docutils public repo -- always up-to-date From python-checkins at python.org Thu Jan 12 04:38:08 2006 From: python-checkins at python.org (david.goodger) Date: Thu, 12 Jan 2006 04:38:08 +0100 (CET) Subject: [Python-checkins] r42016 - peps/trunk/roman.py Message-ID: <20060112033808.A63821E4002@bag.python.org> Author: david.goodger Date: Thu Jan 12 04:38:08 2006 New Revision: 42016 Added: peps/trunk/roman.py (contents, props changed) Log: add 3rd-party module required by Docutils Added: peps/trunk/roman.py ============================================================================== --- (empty file) +++ peps/trunk/roman.py Thu Jan 12 04:38:08 2006 @@ -0,0 +1,81 @@ +"""Convert to and from Roman numerals""" + +__author__ = "Mark Pilgrim (f8dy at diveintopython.org)" +__version__ = "1.4" +__date__ = "8 August 2001" +__copyright__ = """Copyright (c) 2001 Mark Pilgrim + +This program is part of "Dive Into Python", a free Python tutorial for +experienced programmers. Visit http://diveintopython.org/ for the +latest version. + +This program is free software; you can redistribute it and/or modify +it under the terms of the Python 2.1.1 license, available at +http://www.python.org/2.1.1/license.html +""" + +import re + +#Define exceptions +class RomanError(Exception): pass +class OutOfRangeError(RomanError): pass +class NotIntegerError(RomanError): pass +class InvalidRomanNumeralError(RomanError): pass + +#Define digit mapping +romanNumeralMap = (('M', 1000), + ('CM', 900), + ('D', 500), + ('CD', 400), + ('C', 100), + ('XC', 90), + ('L', 50), + ('XL', 40), + ('X', 10), + ('IX', 9), + ('V', 5), + ('IV', 4), + ('I', 1)) + +def toRoman(n): + """convert integer to Roman numeral""" + if not (0 < n < 5000): + raise OutOfRangeError, "number out of range (must be 1..4999)" + if int(n) <> n: + raise NotIntegerError, "decimals can not be converted" + + result = "" + for numeral, integer in romanNumeralMap: + while n >= integer: + result += numeral + n -= integer + return result + +#Define pattern to detect valid Roman numerals +romanNumeralPattern = re.compile(""" + ^ # beginning of string + M{0,4} # thousands - 0 to 4 M's + (CM|CD|D?C{0,3}) # hundreds - 900 (CM), 400 (CD), 0-300 (0 to 3 C's), + # or 500-800 (D, followed by 0 to 3 C's) + (XC|XL|L?X{0,3}) # tens - 90 (XC), 40 (XL), 0-30 (0 to 3 X's), + # or 50-80 (L, followed by 0 to 3 X's) + (IX|IV|V?I{0,3}) # ones - 9 (IX), 4 (IV), 0-3 (0 to 3 I's), + # or 5-8 (V, followed by 0 to 3 I's) + $ # end of string + """ ,re.VERBOSE) + +def fromRoman(s): + """convert Roman numeral to integer""" + if not s: + raise InvalidRomanNumeralError, 'Input can not be blank' + if not romanNumeralPattern.search(s): + raise InvalidRomanNumeralError, 'Invalid Roman numeral: %s' % s + + result = 0 + index = 0 + for numeral, integer in romanNumeralMap: + while s[index:index+len(numeral)] == numeral: + result += integer + index += len(numeral) + return result + From python-checkins at python.org Thu Jan 12 04:38:20 2006 From: python-checkins at python.org (david.goodger) Date: Thu, 12 Jan 2006 04:38:20 +0100 (CET) Subject: [Python-checkins] r42017 - peps/trunk/docutils.conf Message-ID: <20060112033820.141771E4002@bag.python.org> Author: david.goodger Date: Thu Jan 12 04:38:19 2006 New Revision: 42017 Modified: peps/trunk/docutils.conf Log: update config Modified: peps/trunk/docutils.conf ============================================================================== --- peps/trunk/docutils.conf (original) +++ peps/trunk/docutils.conf Thu Jan 12 04:38:19 2006 @@ -1,16 +1,11 @@ # Configuration file for Docutils. # See http://docutils.sf.net/docs/tools.html -[options] - +[general] # These entries are for the page footer: source-link: 1 datestamp: %Y-%m-%d %H:%M UTC generator: 1 -# reStructuredText-style PEP setup: -pep-template: pep-html-template -pep-stylesheet: pep.css - -# Standalone HTML: -stylesheet: ../css/docutils.css +# link to the stylesheet; don't embed it +embed-stylesheet: 0 From python-checkins at python.org Thu Jan 12 04:42:21 2006 From: python-checkins at python.org (david.goodger) Date: Thu, 12 Jan 2006 04:42:21 +0100 (CET) Subject: [Python-checkins] r42018 - peps/trunk/docutils.conf peps/trunk/pep.css Message-ID: <20060112034221.1D5181E4002@bag.python.org> Author: david.goodger Date: Thu Jan 12 04:42:20 2006 New Revision: 42018 Modified: peps/trunk/docutils.conf peps/trunk/pep.css Log: updated stylesheet & config file Modified: peps/trunk/docutils.conf ============================================================================== --- peps/trunk/docutils.conf (original) +++ peps/trunk/docutils.conf Thu Jan 12 04:42:20 2006 @@ -7,5 +7,8 @@ datestamp: %Y-%m-%d %H:%M UTC generator: 1 +# use the local stylesheet +stylesheet: pep.css + # link to the stylesheet; don't embed it embed-stylesheet: 0 Modified: peps/trunk/pep.css ============================================================================== --- peps/trunk/pep.css (original) +++ peps/trunk/pep.css Thu Jan 12 04:42:20 2006 @@ -125,7 +125,8 @@ font-style: normal } div.figure { - margin-left: 2em } + margin-left: 2em ; + margin-right: 2em } div.footer, div.header { clear: both; @@ -274,10 +275,6 @@ font-family: serif ; font-size: 100% } -pre.line-block { - font-family: serif ; - font-size: 100% } - pre.literal-block, pre.doctest-block { margin-left: 2em ; margin-right: 2em ; @@ -311,7 +308,8 @@ font-size: 80% } table.citation { - border-left: solid thin gray } + border-left: solid 1px gray; + margin-left: 1px } table.docinfo { margin: 2em 4em } @@ -321,7 +319,8 @@ margin-bottom: 0.5em } table.footnote { - border-left: solid thin black } + border-left: solid 1px black; + margin-left: 1px } table.docutils td, table.docutils th, table.docinfo td, table.docinfo th { From python-checkins at python.org Thu Jan 12 16:41:05 2006 From: python-checkins at python.org (georg.brandl) Date: Thu, 12 Jan 2006 16:41:05 +0100 (CET) Subject: [Python-checkins] r42019 - python/trunk/Lib/test/test_curses.py Message-ID: <20060112154105.87CC11E4346@bag.python.org> Author: georg.brandl Date: Thu Jan 12 16:41:05 2006 New Revision: 42019 Modified: python/trunk/Lib/test/test_curses.py Log: Test curses.setupterm() before initscr(). Modified: python/trunk/Lib/test/test_curses.py ============================================================================== --- python/trunk/Lib/test/test_curses.py (original) +++ python/trunk/Lib/test/test_curses.py Thu Jan 12 16:41:05 2006 @@ -173,7 +173,6 @@ curses.qiflush() curses.raw() ; curses.raw(1) curses.setsyx(5,5) - curses.setupterm(fd=sys.__stdout__.fileno()) curses.tigetflag('hc') curses.tigetnum('co') curses.tigetstr('cr') @@ -239,12 +238,14 @@ finally: curses.resetty() - if __name__ == '__main__': curses.wrapper(main) unit_tests() else: try: + # testing setupterm() inside initscr/endwin + # causes terminal breakage + curses.setupterm(fd=sys.__stdout__.fileno()) stdscr = curses.initscr() main(stdscr) finally: From python-checkins at python.org Thu Jan 12 16:41:15 2006 From: python-checkins at python.org (georg.brandl) Date: Thu, 12 Jan 2006 16:41:15 +0100 (CET) Subject: [Python-checkins] r42020 - python/branches/release24-maint/Lib/test/test_curses.py Message-ID: <20060112154115.F327E1E43ED@bag.python.org> Author: georg.brandl Date: Thu Jan 12 16:41:15 2006 New Revision: 42020 Modified: python/branches/release24-maint/Lib/test/test_curses.py Log: Test curses.setupterm() before initscr(). Modified: python/branches/release24-maint/Lib/test/test_curses.py ============================================================================== --- python/branches/release24-maint/Lib/test/test_curses.py (original) +++ python/branches/release24-maint/Lib/test/test_curses.py Thu Jan 12 16:41:15 2006 @@ -173,7 +173,6 @@ curses.qiflush() curses.raw() ; curses.raw(1) curses.setsyx(5,5) - curses.setupterm(fd=sys.__stdout__.fileno()) curses.tigetflag('hc') curses.tigetnum('co') curses.tigetstr('cr') @@ -239,12 +238,14 @@ finally: curses.resetty() - if __name__ == '__main__': curses.wrapper(main) unit_tests() else: try: + # testing setupterm() inside initscr/endwin + # causes terminal breakage + curses.setupterm(fd=sys.__stdout__.fileno()) stdscr = curses.initscr() main(stdscr) finally: From python-checkins at python.org Thu Jan 12 18:01:45 2006 From: python-checkins at python.org (martin.v.loewis) Date: Thu, 12 Jan 2006 18:01:45 +0100 (CET) Subject: [Python-checkins] r42021 - in python/branches/ssize_t: Include/intobject.h Objects/intobject.c Message-ID: <20060112170145.003AA1E4007@bag.python.org> Author: martin.v.loewis Date: Thu Jan 12 18:01:44 2006 New Revision: 42021 Modified: python/branches/ssize_t/Include/intobject.h python/branches/ssize_t/Objects/intobject.c Log: Modified: python/branches/ssize_t/Include/intobject.h ============================================================================== --- python/branches/ssize_t/Include/intobject.h (original) +++ python/branches/ssize_t/Include/intobject.h Thu Jan 12 18:01:44 2006 @@ -32,7 +32,7 @@ PyAPI_FUNC(PyObject *) PyInt_FromString(char*, char**, int); #ifdef Py_USING_UNICODE -PyAPI_FUNC(PyObject *) PyInt_FromUnicode(Py_UNICODE*, int, int); +PyAPI_FUNC(PyObject *) PyInt_FromUnicode(Py_UNICODE*, Py_ssize_t, int); #endif PyAPI_FUNC(PyObject *) PyInt_FromLong(long); PyAPI_FUNC(PyObject *) PyInt_FromSize_t(size_t); Modified: python/branches/ssize_t/Objects/intobject.c ============================================================================== --- python/branches/ssize_t/Objects/intobject.c (original) +++ python/branches/ssize_t/Objects/intobject.c Thu Jan 12 18:01:44 2006 @@ -188,14 +188,14 @@ Py_ssize_t PyInt_AsSsize_t(register PyObject *op) { + PyNumberMethods *nb; + PyIntObject *io; + Py_ssize_t val; if (op && !PyInt_CheckExact(op) && PyLong_Check(op)) return _PyLong_AsSsize_t(op); #if SIZEOF_SIZE_T==SIZEOF_LONG return PyInt_AsLong(op); #else - PyNumberMethods *nb; - PyIntObject *io; - Py_ssize_t val; if (op && PyInt_Check(op)) return PyInt_AS_LONG((PyIntObject*) op); @@ -216,7 +216,7 @@ if (!PyInt_Check(io)) { if (PyLong_Check(io)) { /* got a long? => retry int conversion */ - val = PyLong_AsSsize_t((PyObject *)io); + val = _PyLong_AsSsize_t((PyObject *)io); Py_DECREF(io); if ((val == -1) && PyErr_Occurred()) return -1; @@ -371,7 +371,7 @@ #ifdef Py_USING_UNICODE PyObject * -PyInt_FromUnicode(Py_UNICODE *s, int length, int base) +PyInt_FromUnicode(Py_UNICODE *s, Py_ssize_t length, int base) { PyObject *result; char *buffer = PyMem_MALLOC(length+1); From python-checkins at python.org Thu Jan 12 18:11:28 2006 From: python-checkins at python.org (martin.v.loewis) Date: Thu, 12 Jan 2006 18:11:28 +0100 (CET) Subject: [Python-checkins] r42022 - in python/branches/ssize_t: Include/longobject.h Objects/longobject.c Message-ID: <20060112171128.DCBE01E4002@bag.python.org> Author: martin.v.loewis Date: Thu Jan 12 18:11:27 2006 New Revision: 42022 Modified: python/branches/ssize_t/Include/longobject.h python/branches/ssize_t/Objects/longobject.c Log: Fix VC2005 warnings. Modified: python/branches/ssize_t/Include/longobject.h ============================================================================== --- python/branches/ssize_t/Include/longobject.h (original) +++ python/branches/ssize_t/Include/longobject.h Thu Jan 12 18:11:27 2006 @@ -48,7 +48,7 @@ PyAPI_FUNC(PyObject *) PyLong_FromString(char *, char **, int); #ifdef Py_USING_UNICODE -PyAPI_FUNC(PyObject *) PyLong_FromUnicode(Py_UNICODE*, int, int); +PyAPI_FUNC(PyObject *) PyLong_FromUnicode(Py_UNICODE*, Py_ssize_t, int); #endif /* _PyLong_Sign. Return 0 if v is 0, -1 if v < 0, +1 if v > 0. Modified: python/branches/ssize_t/Objects/longobject.c ============================================================================== --- python/branches/ssize_t/Objects/longobject.c (original) +++ python/branches/ssize_t/Objects/longobject.c Thu Jan 12 18:11:27 2006 @@ -51,7 +51,7 @@ long_normalize(register PyLongObject *v) { Py_ssize_t j = ABS(v->ob_size); - register int i = j; + Py_ssize_t i = j; while (i > 0 && v->ob_digit[i-1] == 0) --i; @@ -78,7 +78,7 @@ _PyLong_Copy(PyLongObject *src) { PyLongObject *result; - int i; + Py_ssize_t i; assert(src != NULL); i = src->ob_size; @@ -994,7 +994,7 @@ * x[m-1], and the remaining carry (0 or 1) is returned. */ static digit -v_iadd(digit *x, int m, digit *y, int n) +v_iadd(digit *x, Py_ssize_t m, digit *y, Py_ssize_t n) { int i; digit carry = 0; @@ -1020,7 +1020,7 @@ * far as x[m-1], and the remaining borrow (0 or 1) is returned. */ static digit -v_isub(digit *x, int m, digit *y, int n) +v_isub(digit *x, Py_ssize_t m, digit *y, Py_ssize_t n) { int i; digit borrow = 0; @@ -1077,7 +1077,7 @@ immutable. */ static digit -inplace_divrem1(digit *pout, digit *pin, int size, digit n) +inplace_divrem1(digit *pout, digit *pin, Py_ssize_t size, digit n) { twodigits rem = 0; @@ -1430,7 +1430,7 @@ #ifdef Py_USING_UNICODE PyObject * -PyLong_FromUnicode(Py_UNICODE *u, int length, int base) +PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base) { PyObject *result; char *buffer = PyMem_MALLOC(length+1); @@ -1624,7 +1624,7 @@ static int long_compare(PyLongObject *a, PyLongObject *b) { - int sign; + Py_ssize_t sign; if (a->ob_size != b->ob_size) { if (ABS(a->ob_size) == 0 && ABS(b->ob_size) == 0) @@ -1633,7 +1633,7 @@ sign = a->ob_size - b->ob_size; } else { - int i = ABS(a->ob_size); + Py_ssize_t i = ABS(a->ob_size); while (--i >= 0 && a->ob_digit[i] == b->ob_digit[i]) ; if (i < 0) @@ -1651,7 +1651,8 @@ long_hash(PyLongObject *v) { long x; - int i, sign; + Py_ssize_t i; + int sign; /* This is designed so that Python ints and longs with the same value hash to the same value, otherwise comparisons @@ -1690,7 +1691,7 @@ /* Ensure a is the larger of the two: */ if (size_a < size_b) { { PyLongObject *temp = a; a = b; b = temp; } - { int size_temp = size_a; + { Py_ssize_t size_temp = size_a; size_a = size_b; size_b = size_temp; } } @@ -1718,7 +1719,7 @@ { Py_ssize_t size_a = ABS(a->ob_size), size_b = ABS(b->ob_size); PyLongObject *z; - int i; + Py_ssize_t i; int sign = 1; digit borrow = 0; @@ -1726,7 +1727,7 @@ if (size_a < size_b) { sign = -1; { PyLongObject *temp = a; a = b; b = temp; } - { int size_temp = size_a; + { Py_ssize_t size_temp = size_a; size_a = size_b; size_b = size_temp; } } @@ -1914,7 +1915,7 @@ Returns 0 on success, -1 on failure. */ static int -kmul_split(PyLongObject *n, int size, PyLongObject **high, PyLongObject **low) +kmul_split(PyLongObject *n, Py_ssize_t size, PyLongObject **high, PyLongObject **low) { PyLongObject *hi, *lo; Py_ssize_t size_lo, size_hi; @@ -1955,7 +1956,7 @@ PyLongObject *bl = NULL; PyLongObject *ret = NULL; PyLongObject *t1, *t2, *t3; - int shift; /* the number of digits we split off */ + Py_ssize_t shift; /* the number of digits we split off */ Py_ssize_t i; /* (ah*X+al)(bh*X+bl) = ah*bh*X*X + (ah*bl + al*bh)*X + al*bl @@ -2191,7 +2192,7 @@ nbdone = 0; while (bsize > 0) { PyLongObject *product; - const int nbtouse = MIN(bsize, asize); + const Py_ssize_t nbtouse = MIN(bsize, asize); /* Multiply the next slice of b by a. */ memcpy(bslice->ob_digit, b->ob_digit + nbdone, From python-checkins at python.org Thu Jan 12 18:12:37 2006 From: python-checkins at python.org (martin.v.loewis) Date: Thu, 12 Jan 2006 18:12:37 +0100 (CET) Subject: [Python-checkins] r42023 - python/branches/ssize_t/Objects/object.c Message-ID: <20060112171237.15FDA1E4002@bag.python.org> Author: martin.v.loewis Date: Thu Jan 12 18:12:36 2006 New Revision: 42023 Modified: python/branches/ssize_t/Objects/object.c Log: Add SAFE_CAST Modified: python/branches/ssize_t/Objects/object.c ============================================================================== --- python/branches/ssize_t/Objects/object.c (original) +++ python/branches/ssize_t/Objects/object.c Thu Jan 12 18:12:36 2006 @@ -1432,7 +1432,8 @@ res = (*v->ob_type->tp_as_sequence->sq_length)(v); else return 1; - return (res > 0) ? 1 : (int)res; + /* if it is negative, it should be either -1 or -2 */ + return (res > 0) ? 1 : Py_SAFE_DOWNCAST(res, Py_ssize_t, int); } /* equivalent of 'not v' From python-checkins at python.org Thu Jan 12 18:13:55 2006 From: python-checkins at python.org (martin.v.loewis) Date: Thu, 12 Jan 2006 18:13:55 +0100 (CET) Subject: [Python-checkins] r42024 - python/branches/ssize_t/Objects/rangeobject.c Message-ID: <20060112171355.5D7AD1E4002@bag.python.org> Author: martin.v.loewis Date: Thu Jan 12 18:13:55 2006 New Revision: 42024 Modified: python/branches/ssize_t/Objects/rangeobject.c Log: Return ssize_t from item(). Modified: python/branches/ssize_t/Objects/rangeobject.c ============================================================================== --- python/branches/ssize_t/Objects/rangeobject.c (original) +++ python/branches/ssize_t/Objects/rangeobject.c Thu Jan 12 18:13:55 2006 @@ -98,7 +98,7 @@ "xrange object index out of range"); return NULL; } - return PyInt_FromLong(r->start + (i % r->len) * r->step); + return PyInt_FromSsize_t(r->start + (i % r->len) * r->step); } static Py_ssize_t From python-checkins at python.org Thu Jan 12 18:34:05 2006 From: python-checkins at python.org (martin.v.loewis) Date: Thu, 12 Jan 2006 18:34:05 +0100 (CET) Subject: [Python-checkins] r42025 - in python/branches/ssize_t: Include/unicodeobject.h Objects/unicodeobject.c Message-ID: <20060112173405.4541E1E4002@bag.python.org> Author: martin.v.loewis Date: Thu Jan 12 18:34:04 2006 New Revision: 42025 Modified: python/branches/ssize_t/Include/unicodeobject.h python/branches/ssize_t/Objects/unicodeobject.c Log: Fix VC2005 warnings. Modified: python/branches/ssize_t/Include/unicodeobject.h ============================================================================== --- python/branches/ssize_t/Include/unicodeobject.h (original) +++ python/branches/ssize_t/Include/unicodeobject.h Thu Jan 12 18:34:04 2006 @@ -432,7 +432,7 @@ /* Get the length of the Unicode object. */ -PyAPI_FUNC(int) PyUnicode_GetSize( +PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize( PyObject *unicode /* Unicode object */ ); @@ -524,10 +524,10 @@ possibly trailing 0-termination character) or -1 in case of an error. */ -PyAPI_FUNC(int) PyUnicode_AsWideChar( +PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar( PyUnicodeObject *unicode, /* Unicode object */ register wchar_t *w, /* wchar_t buffer */ - int size /* size of buffer */ + Py_ssize_t size /* size of buffer */ ); #endif @@ -995,7 +995,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_Split( PyObject *s, /* String to split */ PyObject *sep, /* String separator */ - int maxsplit /* Maxsplit count */ + Py_ssize_t maxsplit /* Maxsplit count */ ); /* Dito, but split at line breaks. @@ -1024,7 +1024,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_RSplit( PyObject *s, /* String to split */ PyObject *sep, /* String separator */ - int maxsplit /* Maxsplit count */ + Py_ssize_t maxsplit /* Maxsplit count */ ); /* Translate a string by applying a character mapping table to it and @@ -1092,7 +1092,7 @@ PyObject *str, /* String */ PyObject *substr, /* Substring to find */ PyObject *replstr, /* Substring to replace */ - int maxcount /* Max. number of replacements to apply; + Py_ssize_t maxcount /* Max. number of replacements to apply; -1 = all */ ); Modified: python/branches/ssize_t/Objects/unicodeobject.c ============================================================================== --- python/branches/ssize_t/Objects/unicodeobject.c (original) +++ python/branches/ssize_t/Objects/unicodeobject.c Thu Jan 12 18:34:04 2006 @@ -378,9 +378,9 @@ return (PyObject *)unicode; } -int PyUnicode_AsWideChar(PyUnicodeObject *unicode, - register wchar_t *w, - int size) +Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode, + wchar_t *w, + Py_ssize_t size) { if (unicode == NULL) { PyErr_BadInternalCall(); @@ -698,7 +698,7 @@ return NULL; } -int PyUnicode_GetSize(PyObject *unicode) +Py_ssize_t PyUnicode_GetSize(PyObject *unicode) { if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); @@ -755,7 +755,7 @@ Py_ssize_t requiredsize; Py_ssize_t newpos; Py_UNICODE *repptr; - int repsize; + Py_ssize_t repsize; int res = -1; if (*errorHandler == NULL) { @@ -2848,11 +2848,11 @@ const char *e; PyUnicodeObject *v; Py_UNICODE *p; - int extrachars = 0; + Py_ssize_t extrachars = 0; PyObject *errorHandler = NULL; PyObject *exc = NULL; Py_UNICODE *mapstring = NULL; - int maplen = 0; + Py_ssize_t maplen = 0; /* Default to Latin-1 */ if (mapping == NULL) @@ -2942,7 +2942,7 @@ continue; } else if (PyUnicode_Check(x)) { - int targetsize = PyUnicode_GET_SIZE(x); + Py_ssize_t targetsize = PyUnicode_GET_SIZE(x); if (targetsize == 1) /* 1-1 mapping */ @@ -2952,8 +2952,8 @@ /* 1-n mapping */ if (targetsize > extrachars) { /* resize first */ - int oldpos = (int)(p - PyUnicode_AS_UNICODE(v)); - int needed = (targetsize - extrachars) + \ + Py_ssize_t oldpos = p - PyUnicode_AS_UNICODE(v); + Py_ssize_t needed = (targetsize - extrachars) + \ (targetsize << 2); extrachars += needed; if (_PyUnicode_Resize(&v, @@ -3076,7 +3076,7 @@ } else { const char *repchars = PyString_AS_STRING(rep); - int repsize = PyString_GET_SIZE(rep); + Py_ssize_t repsize = PyString_GET_SIZE(rep); Py_ssize_t requiredsize = *outpos+repsize; if (outsize oldsize) { /* remember old output position */ - int outpos = *outp-PyUnicode_AS_UNICODE(*outobj); + Py_ssize_t outpos = *outp-PyUnicode_AS_UNICODE(*outobj); /* exponentially overallocate to minimize reallocations */ if (requiredsize < 2 * oldsize) requiredsize = 2 * oldsize; @@ -3461,7 +3461,7 @@ Return 0 on success, -1 on error. */ static int charmaptranslate_output(const Py_UNICODE *startinp, const Py_UNICODE *curinp, - int insize, PyObject *mapping, PyObject **outobj, Py_UNICODE **outp, + Py_ssize_t insize, PyObject *mapping, PyObject **outobj, Py_UNICODE **outp, PyObject **res) { if (charmaptranslate_lookup(*curinp, mapping, res)) @@ -3477,14 +3477,14 @@ *(*outp)++ = (Py_UNICODE)PyInt_AS_LONG(*res); } else if (PyUnicode_Check(*res)) { - int repsize = PyUnicode_GET_SIZE(*res); + Py_ssize_t repsize = PyUnicode_GET_SIZE(*res); if (repsize==1) { /* no overflow check, because we know that the space is enough */ *(*outp)++ = *PyUnicode_AS_UNICODE(*res); } else if (repsize!=0) { /* more than one character */ - int requiredsize = (*outp-PyUnicode_AS_UNICODE(*outobj)) + + Py_ssize_t requiredsize = (*outp-PyUnicode_AS_UNICODE(*outobj)) + (insize - (curinp-startinp)) + repsize - 1; if (charmaptranslate_makespace(outobj, outp, requiredsize)) @@ -3511,7 +3511,7 @@ /* pointer into the output */ Py_UNICODE *str; /* current output position */ - int respos = 0; + Py_ssize_t respos = 0; char *reason = "character maps to "; PyObject *errorHandler = NULL; PyObject *exc = NULL; @@ -3546,7 +3546,7 @@ ++p; else { /* untranslatable character */ PyObject *repunicode = NULL; /* initialize to prevent gcc warning */ - int repsize; + Py_ssize_t repsize; Py_ssize_t newpos; Py_UNICODE *uni2; /* startpos for collecting untranslatable chars */ @@ -3688,7 +3688,7 @@ register Py_UNICODE ch = *p; int decimal; PyObject *repunicode; - int repsize; + Py_ssize_t repsize; Py_ssize_t newpos; Py_UNICODE *uni2; Py_UNICODE *collstart; @@ -3795,10 +3795,10 @@ /* --- Helpers ------------------------------------------------------------ */ static -int count(PyUnicodeObject *self, - int start, - int end, - PyUnicodeObject *substring) +Py_ssize_t count(PyUnicodeObject *self, + Py_ssize_t start, + Py_ssize_t end, + PyUnicodeObject *substring) { int count = 0; @@ -3916,7 +3916,7 @@ } static -Py_ssize_t tailmatch(PyUnicodeObject *self, +int tailmatch(PyUnicodeObject *self, PyUnicodeObject *substring, Py_ssize_t start, Py_ssize_t end, @@ -4023,7 +4023,7 @@ static int fixupper(PyUnicodeObject *self) { - int len = self->length; + Py_ssize_t len = self->length; Py_UNICODE *s = self->str; int status = 0; @@ -4044,7 +4044,7 @@ static int fixlower(PyUnicodeObject *self) { - int len = self->length; + Py_ssize_t len = self->length; Py_UNICODE *s = self->str; int status = 0; @@ -4065,7 +4065,7 @@ static int fixswapcase(PyUnicodeObject *self) { - int len = self->length; + Py_ssize_t len = self->length; Py_UNICODE *s = self->str; int status = 0; @@ -4086,7 +4086,7 @@ static int fixcapitalize(PyUnicodeObject *self) { - int len = self->length; + Py_ssize_t len = self->length; Py_UNICODE *s = self->str; int status = 0; @@ -4157,7 +4157,7 @@ size_t res_used; /* # used bytes */ Py_UNICODE *res_p; /* pointer to free byte in res's string area */ PyObject *fseq; /* PySequence_Fast(seq) */ - int seqlen; /* len(fseq) -- number of items in sequence */ + Py_ssize_t seqlen; /* len(fseq) -- number of items in sequence */ PyObject *item; int i; @@ -4297,8 +4297,8 @@ static PyUnicodeObject *pad(PyUnicodeObject *self, - int left, - int right, + Py_ssize_t left, + Py_ssize_t right, Py_UNICODE fill) { PyUnicodeObject *u; @@ -4350,11 +4350,11 @@ static PyObject *split_whitespace(PyUnicodeObject *self, PyObject *list, - int maxcount) + Py_ssize_t maxcount) { - register int i; - register int j; - int len = self->length; + register Py_ssize_t i; + register Py_ssize_t j; + Py_ssize_t len = self->length; PyObject *str; for (i = j = 0; i < len; ) { @@ -4386,9 +4386,9 @@ PyObject *PyUnicode_Splitlines(PyObject *string, int keepends) { - register int i; - register int j; - int len; + register Py_ssize_t i; + register Py_ssize_t j; + Py_ssize_t len; PyObject *list; PyObject *str; Py_UNICODE *data; @@ -4404,7 +4404,7 @@ goto onError; for (i = j = 0; i < len; ) { - int eol; + Py_ssize_t eol; /* Find a line and append it */ while (i < len && !Py_UNICODE_ISLINEBREAK(data[i])) @@ -4441,11 +4441,11 @@ PyObject *split_char(PyUnicodeObject *self, PyObject *list, Py_UNICODE ch, - int maxcount) + Py_ssize_t maxcount) { - register int i; - register int j; - int len = self->length; + register Py_ssize_t i; + register Py_ssize_t j; + Py_ssize_t len = self->length; PyObject *str; for (i = j = 0; i < len; ) { @@ -4471,12 +4471,12 @@ PyObject *split_substring(PyUnicodeObject *self, PyObject *list, PyUnicodeObject *substring, - int maxcount) + Py_ssize_t maxcount) { - register int i; - register int j; - int len = self->length; - int sublen = substring->length; + register Py_ssize_t i; + register Py_ssize_t j; + Py_ssize_t len = self->length; + Py_ssize_t sublen = substring->length; PyObject *str; for (i = j = 0; i <= len - sublen; ) { @@ -4501,11 +4501,11 @@ static PyObject *rsplit_whitespace(PyUnicodeObject *self, PyObject *list, - int maxcount) + Py_ssize_t maxcount) { - register int i; - register int j; - int len = self->length; + register Py_ssize_t i; + register Py_ssize_t j; + Py_ssize_t len = self->length; PyObject *str; for (i = j = len - 1; i >= 0; ) { @@ -4538,11 +4538,11 @@ PyObject *rsplit_char(PyUnicodeObject *self, PyObject *list, Py_UNICODE ch, - int maxcount) + Py_ssize_t maxcount) { - register int i; - register int j; - int len = self->length; + register Py_ssize_t i; + register Py_ssize_t j; + Py_ssize_t len = self->length; PyObject *str; for (i = j = len - 1; i >= 0; ) { @@ -4568,12 +4568,12 @@ PyObject *rsplit_substring(PyUnicodeObject *self, PyObject *list, PyUnicodeObject *substring, - int maxcount) + Py_ssize_t maxcount) { - register int i; - register int j; - int len = self->length; - int sublen = substring->length; + register Py_ssize_t i; + register Py_ssize_t j; + Py_ssize_t len = self->length; + Py_ssize_t sublen = substring->length; PyObject *str; for (i = len - sublen, j = len; i >= 0; ) { @@ -4602,7 +4602,7 @@ static PyObject *split(PyUnicodeObject *self, PyUnicodeObject *substring, - int maxcount) + Py_ssize_t maxcount) { PyObject *list; @@ -4631,7 +4631,7 @@ static PyObject *rsplit(PyUnicodeObject *self, PyUnicodeObject *substring, - int maxcount) + Py_ssize_t maxcount) { PyObject *list; @@ -4661,7 +4661,7 @@ PyObject *replace(PyUnicodeObject *self, PyUnicodeObject *str1, PyUnicodeObject *str2, - int maxcount) + Py_ssize_t maxcount) { PyUnicodeObject *u; @@ -4698,7 +4698,7 @@ } } else { - int n, i; + Py_ssize_t n, i; Py_UNICODE *p; /* replace strings */ @@ -4790,7 +4790,7 @@ { PyObject *list; PyObject *item; - int i; + Py_ssize_t i; /* Split into words */ list = split(self, NULL, -1); @@ -4852,8 +4852,8 @@ static PyObject * unicode_center(PyUnicodeObject *self, PyObject *args) { - int marg, left; - int width; + Py_ssize_t marg, left; + Py_ssize_t width; Py_UNICODE fillchar = ' '; if (!PyArg_ParseTuple(args, "i|O&:center", &width, convert_uc, &fillchar)) @@ -4891,7 +4891,7 @@ static int unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2) { - int len1, len2; + Py_ssize_t len1, len2; Py_UNICODE *s1 = str1->str; Py_UNICODE *s2 = str2->str; @@ -4925,7 +4925,7 @@ static int unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2) { - register int len1, len2; + register Py_ssize_t len1, len2; Py_UNICODE *s1 = str1->str; Py_UNICODE *s2 = str2->str; @@ -4987,7 +4987,8 @@ PyObject *element) { PyUnicodeObject *u = NULL, *v = NULL; - int result, size; + int result; + Py_ssize_t size; register const Py_UNICODE *lhs, *end, *rhs; /* Coerce the two arguments */ @@ -5202,7 +5203,7 @@ Py_UNICODE *e; Py_UNICODE *p; Py_UNICODE *q; - int i, j; + Py_ssize_t i, j; PyUnicodeObject *u; int tabsize = 8; @@ -5277,7 +5278,7 @@ if (substring == NULL) return NULL; - result = PyInt_FromLong(findstring(self, substring, start, end, 1)); + result = PyInt_FromSsize_t(findstring(self, substring, start, end, 1)); Py_DECREF(substring); return result; @@ -5303,7 +5304,7 @@ strings and Unicode objects behave in the same way as dictionary keys. */ - register int len; + register Py_ssize_t len; register Py_UNICODE *p; register long x; @@ -5719,10 +5720,10 @@ _PyUnicode_XStrip(PyUnicodeObject *self, int striptype, PyObject *sepobj) { Py_UNICODE *s = PyUnicode_AS_UNICODE(self); - int len = PyUnicode_GET_SIZE(self); + Py_ssize_t len = PyUnicode_GET_SIZE(self); Py_UNICODE *sep = PyUnicode_AS_UNICODE(sepobj); - int seplen = PyUnicode_GET_SIZE(sepobj); - int i, j; + Py_ssize_t seplen = PyUnicode_GET_SIZE(sepobj); + Py_ssize_t i, j; i = 0; if (striptype != RIGHTSTRIP) { @@ -5752,7 +5753,7 @@ do_strip(PyUnicodeObject *self, int striptype) { Py_UNICODE *s = PyUnicode_AS_UNICODE(self); - int len = PyUnicode_GET_SIZE(self), i, j; + Py_ssize_t len = PyUnicode_GET_SIZE(self), i, j; i = 0; if (striptype != RIGHTSTRIP) { @@ -5867,7 +5868,7 @@ { PyUnicodeObject *u; Py_UNICODE *p; - int nchars; + Py_ssize_t nchars; size_t nbytes; if (len < 0) @@ -5911,7 +5912,7 @@ PyObject *PyUnicode_Replace(PyObject *obj, PyObject *subobj, PyObject *replobj, - int maxcount) + Py_ssize_t maxcount) { PyObject *self; PyObject *str1; @@ -5954,10 +5955,10 @@ { PyUnicodeObject *str1; PyUnicodeObject *str2; - int maxcount = -1; + Py_ssize_t maxcount = -1; PyObject *result; - if (!PyArg_ParseTuple(args, "OO|i:replace", &str1, &str2, &maxcount)) + if (!PyArg_ParseTuple(args, "OO|n:replace", &str1, &str2, &maxcount)) return NULL; str1 = (PyUnicodeObject *)PyUnicode_FromObject((PyObject *)str1); if (str1 == NULL) @@ -6008,7 +6009,7 @@ if (substring == NULL) return NULL; - result = PyInt_FromLong(findstring(self, substring, start, end, -1)); + result = PyInt_FromSsize_t(findstring(self, substring, start, end, -1)); Py_DECREF(substring); return result; @@ -6069,7 +6070,7 @@ } static PyObject* -unicode_slice(PyUnicodeObject *self, int start, int end) +unicode_slice(PyUnicodeObject *self, Py_ssize_t start, Py_ssize_t end) { /* standard clamping */ if (start < 0) @@ -6092,7 +6093,7 @@ PyObject *PyUnicode_Split(PyObject *s, PyObject *sep, - int maxsplit) + Py_ssize_t maxsplit) { PyObject *result; @@ -6126,9 +6127,9 @@ unicode_split(PyUnicodeObject *self, PyObject *args) { PyObject *substring = Py_None; - int maxcount = -1; + Py_ssize_t maxcount = -1; - if (!PyArg_ParseTuple(args, "|Oi:split", &substring, &maxcount)) + if (!PyArg_ParseTuple(args, "|On:split", &substring, &maxcount)) return NULL; if (substring == Py_None) @@ -6141,7 +6142,7 @@ PyObject *PyUnicode_RSplit(PyObject *s, PyObject *sep, - int maxsplit) + Py_ssize_t maxsplit) { PyObject *result; @@ -6176,9 +6177,9 @@ unicode_rsplit(PyUnicodeObject *self, PyObject *args) { PyObject *substring = Py_None; - int maxcount = -1; + Py_ssize_t maxcount = -1; - if (!PyArg_ParseTuple(args, "|Oi:rsplit", &substring, &maxcount)) + if (!PyArg_ParseTuple(args, "|On:rsplit", &substring, &maxcount)) return NULL; if (substring == Py_None) @@ -6263,11 +6264,11 @@ static PyObject * unicode_zfill(PyUnicodeObject *self, PyObject *args) { - int fill; + Py_ssize_t fill; PyUnicodeObject *u; - int width; - if (!PyArg_ParseTuple(args, "i:zfill", &width)) + Py_ssize_t width; + if (!PyArg_ParseTuple(args, "n:zfill", &width)) return NULL; if (self->length >= width) { @@ -6511,7 +6512,7 @@ (objobjargproc)0, /* mp_ass_subscript */ }; -static int +static Py_ssize_t unicode_buffer_getreadbuf(PyUnicodeObject *self, int index, const void **ptr) @@ -6525,8 +6526,8 @@ return PyUnicode_GET_DATA_SIZE(self); } -static int -unicode_buffer_getwritebuf(PyUnicodeObject *self, int index, +static Py_ssize_t +unicode_buffer_getwritebuf(PyUnicodeObject *self, Py_ssize_t index, const void **ptr) { PyErr_SetString(PyExc_TypeError, @@ -6534,18 +6535,18 @@ return -1; } -static int +static Py_ssize_t unicode_buffer_getsegcount(PyUnicodeObject *self, - int *lenp) + Py_ssize_t *lenp) { if (lenp) *lenp = PyUnicode_GET_DATA_SIZE(self); return 1; } -static int +static Py_ssize_t unicode_buffer_getcharbuf(PyUnicodeObject *self, - int index, + Py_ssize_t index, const void **ptr) { PyObject *str; @@ -6565,9 +6566,9 @@ /* Helpers for PyUnicode_Format() */ static PyObject * -getnextarg(PyObject *args, int arglen, int *p_argidx) +getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) { - int argidx = *p_argidx; + Py_ssize_t argidx = *p_argidx; if (argidx < arglen) { (*p_argidx)++; if (arglen < 0) @@ -6587,10 +6588,10 @@ #define F_ZERO (1<<4) static -int usprintf(register Py_UNICODE *buffer, char *format, ...) +Py_ssize_t usprintf(register Py_UNICODE *buffer, char *format, ...) { - register int i; - int len; + register Py_ssize_t i; + Py_ssize_t len; va_list va; char *charbuffer; va_start(va, format); @@ -6622,6 +6623,7 @@ worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/ char fmt[20]; double x; + Py_ssize_t result; x = PyFloat_AsDouble(v); if (x == -1.0 && PyErr_Occurred()) @@ -6655,7 +6657,8 @@ PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type); - return usprintf(buf, fmt, x); + result = usprintf(buf, fmt, x); + return Py_SAFE_DOWNCAST(result, Py_ssize_t, int); } static PyObject* @@ -6693,6 +6696,7 @@ char fmt[64]; /* plenty big enough! */ char *sign; long x; + Py_ssize_t result; x = PyInt_AsLong(v); if (x == -1 && PyErr_Occurred()) @@ -6747,9 +6751,10 @@ prec, type); } if (sign[0]) - return usprintf(buf, fmt, -x); + result = usprintf(buf, fmt, -x); else - return usprintf(buf, fmt, x); + result = usprintf(buf, fmt, x); + Py_SAFE_DOWNCAST(result, Py_ssize_t, int); } static int @@ -6816,7 +6821,7 @@ PyObject *args) { Py_UNICODE *fmt, *res; - int fmtcnt, rescnt, reslen, arglen, argidx; + Py_ssize_t fmtcnt, rescnt, reslen, arglen, argidx; int args_owned = 0; PyUnicodeObject *result = NULL; PyObject *dict = NULL; @@ -6865,7 +6870,7 @@ else { /* Got a format specifier */ int flags = 0; - int width = -1; + Py_ssize_t width = -1; int prec = -1; Py_UNICODE c = '\0'; Py_UNICODE fill; @@ -6873,7 +6878,7 @@ PyObject *temp = NULL; Py_UNICODE *pbuf; Py_UNICODE sign; - int len; + Py_ssize_t len; Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */ fmt++; @@ -7271,7 +7276,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyUnicodeObject *tmp, *pnew; - int n; + Py_ssize_t n; assert(PyType_IsSubtype(type, &PyUnicode_Type)); tmp = (PyUnicodeObject *)unicode_new(&PyUnicode_Type, args, kwds); From python-checkins at python.org Thu Jan 12 18:53:28 2006 From: python-checkins at python.org (martin.v.loewis) Date: Thu, 12 Jan 2006 18:53:28 +0100 (CET) Subject: [Python-checkins] r42021 - svn:log Message-ID: <20060112175328.D36A81E4002@bag.python.org> Author: martin.v.loewis Revision: 42021 Property Name: svn:log New Property Value: Fix VC2005 warnings. From python-checkins at python.org Fri Jan 13 04:05:25 2006 From: python-checkins at python.org (tim.peters) Date: Fri, 13 Jan 2006 04:05:25 +0100 (CET) Subject: [Python-checkins] r42026 - in python/trunk/Lib/test: crashers/weakref_in_del.py crashers/xml_parsers.py outstanding_bugs.py Message-ID: <20060113030525.E881D1E4025@bag.python.org> Author: tim.peters Date: Fri Jan 13 04:05:25 2006 New Revision: 42026 Modified: python/trunk/Lib/test/crashers/weakref_in_del.py python/trunk/Lib/test/crashers/xml_parsers.py python/trunk/Lib/test/outstanding_bugs.py Log: Whitespace normalization. Modified: python/trunk/Lib/test/crashers/weakref_in_del.py ============================================================================== --- python/trunk/Lib/test/crashers/weakref_in_del.py (original) +++ python/trunk/Lib/test/crashers/weakref_in_del.py Fri Jan 13 04:05:25 2006 @@ -1,17 +1,16 @@ -import weakref - -# http://python.org/sf/1377858 - -ref = None - -def test_weakref_in_del(): - class Target(object): - def __del__(self): - global ref - ref = weakref.ref(self) - - w = Target() - -if __name__ == '__main__': - test_weakref_in_del() - +import weakref + +# http://python.org/sf/1377858 + +ref = None + +def test_weakref_in_del(): + class Target(object): + def __del__(self): + global ref + ref = weakref.ref(self) + + w = Target() + +if __name__ == '__main__': + test_weakref_in_del() Modified: python/trunk/Lib/test/crashers/xml_parsers.py ============================================================================== --- python/trunk/Lib/test/crashers/xml_parsers.py (original) +++ python/trunk/Lib/test/crashers/xml_parsers.py Fri Jan 13 04:05:25 2006 @@ -1,56 +1,56 @@ -from xml.parsers import expat - -# http://python.org/sf/1296433 - -def test_parse_only_xml_data(): - # - xml = "%s" % ('a' * 1025) - # this one doesn't crash - #xml = "%s" % ('a' * 10000) - - def handler(text): - raise Exception - - parser = expat.ParserCreate() - parser.CharacterDataHandler = handler - - try: - parser.Parse(xml) - except: - pass - -if __name__ == '__main__': - test_parse_only_xml_data() - -# Invalid read of size 4 -# at 0x43F936: PyObject_Free (obmalloc.c:735) -# by 0x45A7C7: unicode_dealloc (unicodeobject.c:246) -# by 0x1299021D: PyUnknownEncodingHandler (pyexpat.c:1314) -# by 0x12993A66: processXmlDecl (xmlparse.c:3330) -# by 0x12999211: doProlog (xmlparse.c:3678) -# by 0x1299C3F0: prologInitProcessor (xmlparse.c:3550) -# by 0x12991EA3: XML_ParseBuffer (xmlparse.c:1562) -# by 0x1298F8EC: xmlparse_Parse (pyexpat.c:895) -# by 0x47B3A1: PyEval_EvalFrameEx (ceval.c:3565) -# by 0x47CCAC: PyEval_EvalCodeEx (ceval.c:2739) -# by 0x47CDE1: PyEval_EvalCode (ceval.c:490) -# by 0x499820: PyRun_SimpleFileExFlags (pythonrun.c:1198) -# by 0x4117F1: Py_Main (main.c:492) -# by 0x12476D1F: __libc_start_main (in /lib/libc-2.3.5.so) -# by 0x410DC9: (within /home/neal/build/python/svn/clean/python) -# Address 0x12704020 is 264 bytes inside a block of size 592 free'd -# at 0x11B1BA8A: free (vg_replace_malloc.c:235) -# by 0x124B5F18: (within /lib/libc-2.3.5.so) -# by 0x48DE43: find_module (import.c:1320) -# by 0x48E997: import_submodule (import.c:2249) -# by 0x48EC15: load_next (import.c:2083) -# by 0x48F091: import_module_ex (import.c:1914) -# by 0x48F385: PyImport_ImportModuleEx (import.c:1955) -# by 0x46D070: builtin___import__ (bltinmodule.c:44) -# by 0x4186CF: PyObject_Call (abstract.c:1777) -# by 0x474E9B: PyEval_CallObjectWithKeywords (ceval.c:3432) -# by 0x47928E: PyEval_EvalFrameEx (ceval.c:2038) -# by 0x47CCAC: PyEval_EvalCodeEx (ceval.c:2739) -# by 0x47CDE1: PyEval_EvalCode (ceval.c:490) -# by 0x48D0F7: PyImport_ExecCodeModuleEx (import.c:635) -# by 0x48D4F4: load_source_module (import.c:913) +from xml.parsers import expat + +# http://python.org/sf/1296433 + +def test_parse_only_xml_data(): + # + xml = "%s" % ('a' * 1025) + # this one doesn't crash + #xml = "%s" % ('a' * 10000) + + def handler(text): + raise Exception + + parser = expat.ParserCreate() + parser.CharacterDataHandler = handler + + try: + parser.Parse(xml) + except: + pass + +if __name__ == '__main__': + test_parse_only_xml_data() + +# Invalid read of size 4 +# at 0x43F936: PyObject_Free (obmalloc.c:735) +# by 0x45A7C7: unicode_dealloc (unicodeobject.c:246) +# by 0x1299021D: PyUnknownEncodingHandler (pyexpat.c:1314) +# by 0x12993A66: processXmlDecl (xmlparse.c:3330) +# by 0x12999211: doProlog (xmlparse.c:3678) +# by 0x1299C3F0: prologInitProcessor (xmlparse.c:3550) +# by 0x12991EA3: XML_ParseBuffer (xmlparse.c:1562) +# by 0x1298F8EC: xmlparse_Parse (pyexpat.c:895) +# by 0x47B3A1: PyEval_EvalFrameEx (ceval.c:3565) +# by 0x47CCAC: PyEval_EvalCodeEx (ceval.c:2739) +# by 0x47CDE1: PyEval_EvalCode (ceval.c:490) +# by 0x499820: PyRun_SimpleFileExFlags (pythonrun.c:1198) +# by 0x4117F1: Py_Main (main.c:492) +# by 0x12476D1F: __libc_start_main (in /lib/libc-2.3.5.so) +# by 0x410DC9: (within /home/neal/build/python/svn/clean/python) +# Address 0x12704020 is 264 bytes inside a block of size 592 free'd +# at 0x11B1BA8A: free (vg_replace_malloc.c:235) +# by 0x124B5F18: (within /lib/libc-2.3.5.so) +# by 0x48DE43: find_module (import.c:1320) +# by 0x48E997: import_submodule (import.c:2249) +# by 0x48EC15: load_next (import.c:2083) +# by 0x48F091: import_module_ex (import.c:1914) +# by 0x48F385: PyImport_ImportModuleEx (import.c:1955) +# by 0x46D070: builtin___import__ (bltinmodule.c:44) +# by 0x4186CF: PyObject_Call (abstract.c:1777) +# by 0x474E9B: PyEval_CallObjectWithKeywords (ceval.c:3432) +# by 0x47928E: PyEval_EvalFrameEx (ceval.c:2038) +# by 0x47CCAC: PyEval_EvalCodeEx (ceval.c:2739) +# by 0x47CDE1: PyEval_EvalCode (ceval.c:490) +# by 0x48D0F7: PyImport_ExecCodeModuleEx (import.c:635) +# by 0x48D4F4: load_source_module (import.c:913) Modified: python/trunk/Lib/test/outstanding_bugs.py ============================================================================== --- python/trunk/Lib/test/outstanding_bugs.py (original) +++ python/trunk/Lib/test/outstanding_bugs.py Fri Jan 13 04:05:25 2006 @@ -1,27 +1,27 @@ -# -# This file is for everybody to add tests for bugs that aren't -# fixed yet. Please add a test case and appropriate bug description. -# -# When you fix one of the bugs, please move the test to the correct -# test_ module. -# - -import unittest -from test import test_support - -class TestBug1385040(unittest.TestCase): - def testSyntaxError(self): - import compiler - - # The following snippet gives a SyntaxError in the interpreter - # - # If you compile and exec it, the call foo(7) returns (7, 1) - self.assertRaises(SyntaxError, compiler.compile, - "def foo(a=1, b): return a, b\n\n", "", "exec") - - -def test_main(): - test_support.run_unittest(TestBug1385040) - -if __name__ == "__main__": - test_main() +# +# This file is for everybody to add tests for bugs that aren't +# fixed yet. Please add a test case and appropriate bug description. +# +# When you fix one of the bugs, please move the test to the correct +# test_ module. +# + +import unittest +from test import test_support + +class TestBug1385040(unittest.TestCase): + def testSyntaxError(self): + import compiler + + # The following snippet gives a SyntaxError in the interpreter + # + # If you compile and exec it, the call foo(7) returns (7, 1) + self.assertRaises(SyntaxError, compiler.compile, + "def foo(a=1, b): return a, b\n\n", "", "exec") + + +def test_main(): + test_support.run_unittest(TestBug1385040) + +if __name__ == "__main__": + test_main() From python-checkins at python.org Fri Jan 13 04:43:12 2006 From: python-checkins at python.org (david.goodger) Date: Fri, 13 Jan 2006 04:43:12 +0100 (CET) Subject: [Python-checkins] r42027 - peps/trunk Message-ID: <20060113034312.1ACD51E4002@bag.python.org> Author: david.goodger Date: Fri Jan 13 04:43:10 2006 New Revision: 42027 Modified: peps/trunk/ (props changed) Log: pinned external docutils to revision 4262 (release 0.4) From python-checkins at python.org Fri Jan 13 17:59:47 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 13 Jan 2006 17:59:47 +0100 (CET) Subject: [Python-checkins] r42028 - in python/trunk: Lib/warnings.py Misc/NEWS Message-ID: <20060113165947.DA1511E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 13 17:59:46 2006 New Revision: 42028 Modified: python/trunk/Lib/warnings.py python/trunk/Misc/NEWS Log: Bug #1403410: The warnings module now doesn't get confused when it can't find out the module name it generates a warning for. Modified: python/trunk/Lib/warnings.py ============================================================================== --- python/trunk/Lib/warnings.py (original) +++ python/trunk/Lib/warnings.py Fri Jan 13 17:59:46 2006 @@ -63,7 +63,7 @@ def warn_explicit(message, category, filename, lineno, module=None, registry=None): if module is None: - module = filename + module = filename or "" if module[-3:].lower() == ".py": module = module[:-3] # XXX What about leading pathname? if registry is None: Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Fri Jan 13 17:59:46 2006 @@ -335,6 +335,9 @@ Library ------- +- Bug #1403410: The warnings module now doesn't get confused + when it can't find out the module name it generates a warning for. + - Patch #1177307: Added a new codec utf_8_sig for UTF-8 with a BOM signature. - Patch #1157027: cookielib mishandles RFC 2109 cookies in Netscape mode From python-checkins at python.org Fri Jan 13 17:59:55 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 13 Jan 2006 17:59:55 +0100 (CET) Subject: [Python-checkins] r42029 - in python/branches/release24-maint: Lib/warnings.py Misc/NEWS Message-ID: <20060113165955.6F5911E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 13 17:59:54 2006 New Revision: 42029 Modified: python/branches/release24-maint/Lib/warnings.py python/branches/release24-maint/Misc/NEWS Log: Bug #1403410: The warnings module now doesn't get confused when it can't find out the module name it generates a warning for. Modified: python/branches/release24-maint/Lib/warnings.py ============================================================================== --- python/branches/release24-maint/Lib/warnings.py (original) +++ python/branches/release24-maint/Lib/warnings.py Fri Jan 13 17:59:54 2006 @@ -63,7 +63,7 @@ def warn_explicit(message, category, filename, lineno, module=None, registry=None): if module is None: - module = filename + module = filename or "" if module[-3:].lower() == ".py": module = module[:-3] # XXX What about leading pathname? if registry is None: Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Fri Jan 13 17:59:54 2006 @@ -68,6 +68,9 @@ Library ------- +- Bug #1403410: The warnings module now doesn't get confused + when it can't find out the module name it generates a warning for. + - Patch #1117398: cookielib.LWPCookieJar and .MozillaCookieJar now raise LoadError as documented, instead of IOError. For compatibility, LoadError subclasses IOError. From python-checkins at python.org Fri Jan 13 18:05:58 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 13 Jan 2006 18:05:58 +0100 (CET) Subject: [Python-checkins] r42030 - in python/trunk: Lib/SimpleHTTPServer.py Misc/NEWS Message-ID: <20060113170558.1A5EC1E404C@bag.python.org> Author: georg.brandl Date: Fri Jan 13 18:05:56 2006 New Revision: 42030 Modified: python/trunk/Lib/SimpleHTTPServer.py python/trunk/Misc/NEWS Log: Bug #1394565: SimpleHTTPServer now doesn't choke on query paramters any more. Modified: python/trunk/Lib/SimpleHTTPServer.py ============================================================================== --- python/trunk/Lib/SimpleHTTPServer.py (original) +++ python/trunk/Lib/SimpleHTTPServer.py Fri Jan 13 18:05:56 2006 @@ -14,6 +14,7 @@ import posixpath import BaseHTTPServer import urllib +import urlparse import cgi import shutil import mimetypes @@ -136,6 +137,8 @@ probably be diagnosed.) """ + # abandon query parameters + path = urlparse.urlparse(path)[2] path = posixpath.normpath(urllib.unquote(path)) words = path.split('/') words = filter(None, words) Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Fri Jan 13 18:05:56 2006 @@ -335,6 +335,9 @@ Library ------- +- Bug #1394565: SimpleHTTPServer now doesn't choke on query paramters + any more. + - Bug #1403410: The warnings module now doesn't get confused when it can't find out the module name it generates a warning for. From python-checkins at python.org Fri Jan 13 18:06:02 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 13 Jan 2006 18:06:02 +0100 (CET) Subject: [Python-checkins] r42031 - in python/branches/release24-maint: Lib/SimpleHTTPServer.py Misc/NEWS Message-ID: <20060113170602.BD1221E4013@bag.python.org> Author: georg.brandl Date: Fri Jan 13 18:06:02 2006 New Revision: 42031 Modified: python/branches/release24-maint/Lib/SimpleHTTPServer.py python/branches/release24-maint/Misc/NEWS Log: Bug #1394565: SimpleHTTPServer now doesn't choke on query paramters any more. Modified: python/branches/release24-maint/Lib/SimpleHTTPServer.py ============================================================================== --- python/branches/release24-maint/Lib/SimpleHTTPServer.py (original) +++ python/branches/release24-maint/Lib/SimpleHTTPServer.py Fri Jan 13 18:06:02 2006 @@ -14,6 +14,7 @@ import posixpath import BaseHTTPServer import urllib +import urlparse import cgi import shutil import mimetypes @@ -132,6 +133,8 @@ probably be diagnosed.) """ + # abandon query parameters + path = urlparse.urlparse(path)[2] path = posixpath.normpath(urllib.unquote(path)) words = path.split('/') words = filter(None, words) Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Fri Jan 13 18:06:02 2006 @@ -68,6 +68,9 @@ Library ------- +- Bug #1394565: SimpleHTTPServer now doesn't choke on query paramters + any more. + - Bug #1403410: The warnings module now doesn't get confused when it can't find out the module name it generates a warning for. From python-checkins at python.org Fri Jan 13 22:57:13 2006 From: python-checkins at python.org (phillip.eby) Date: Fri, 13 Jan 2006 22:57:13 +0100 (CET) Subject: [Python-checkins] r42032 - sandbox/trunk/setuptools/setuptools/command/build_ext.py Message-ID: <20060113215713.DBA6A1E4002@bag.python.org> Author: phillip.eby Date: Fri Jan 13 22:57:12 2006 New Revision: 42032 Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py Log: Add experimental code for wrapping relocatable shared libraries on platforms that support the 'dl' module. Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/build_ext.py (original) +++ sandbox/trunk/setuptools/setuptools/command/build_ext.py Fri Jan 13 22:57:12 2006 @@ -11,6 +11,34 @@ from distutils.ccompiler import new_compiler from distutils.sysconfig import customize_compiler +have_rtld = False +libtype = 'shared' +if os.name != 'nt': + try: + from dl import RTLD_NOW + # XXX not ready for primetime yet: have_rtld = True + except ImportError: + pass + + + + + + + + + + + + + + + + + + + + class build_ext(_build_ext): def run(self): """Build extensions in build directory, then copy if --inplace""" @@ -47,13 +75,26 @@ # Then do any actual SWIG stuff on the remainder return _du_build_ext.swig_sources(self, sources, *otherargs) + + + + + def get_ext_filename(self, fullname): filename = _build_ext.get_ext_filename(self,fullname) - for ext in self.shlibs: + for ext in self.extensions: if self.get_ext_fullname(ext.name)==fullname: - fn, ext = os.path.splitext(filename) - return self.shlib_compiler.library_filename(fn,libtype) - return filename + if isinstance(ext,Library): + fn, ext = os.path.splitext(filename) + return self.shlib_compiler.library_filename(fn,libtype) + elif have_rtld and self.links_to_dynamic(ext): + d,fn = os.path.split(filename) + return os.path.join(d,'dl-'+fn) + else: + return filename + raise AssertionError( + "Filename requested for nonexistent extension", fullname + ) def initialize_options(self): _build_ext.initialize_options(self) @@ -66,16 +107,16 @@ if isinstance(ext,Library)] if self.shlibs: self.setup_shlib_compiler() - self.library_dirs.append(self.build_lib) - def build_extension(self, ext): - _compiler = self.compiler - try: - if isinstance(ext,Library): - self.compiler = self.shlib_compiler - _build_ext.build_extension(self,ext) - finally: - self.compiler = _compiler + + + + + + + + + @@ -121,9 +162,50 @@ -if os.name=='nt': - # Build shared libraries on Windows - libtype = 'shared' + def build_extension(self, ext): + _compiler = self.compiler + _rpath = ext.runtime_library_path + _ldirs = library_dirs + try: + if isinstance(ext,Library): + self.compiler = self.shlib_compiler + if have_rtld and self.links_to_dynamic(ext): + ext.runtime_library_path = _rpath + [os.curdir] + ext.library_dirs = _ldirs + [ + os.path.dirname( + os.path.join(self.build_lib, + self.get_ext_filename( + self.get_ext_fullname(ext.name) + ) + ) + ) + ] + # XXX if not lib, write .py stub + _build_ext.build_extension(self,ext) + finally: + self.compiler = _compiler + ext.runtime_library_path = _rpath + ext.library_dirs = _ldirs + + + def links_to_dynamic(self, ext): + """Return true if 'ext' links to a dynamic lib in the same package""" + # XXX this should check to ensure the lib is actually being built + # XXX as dynamic, and not just using a locally-found version or a + # XXX static-compiled version + libnames = dict.fromkeys( + [self.get_ext_fullname(lib.name) for lib in self.shlibs] + ) + if not libnames: + return False + pkg = '.'.join(self.get_ext_fullname(ext.name).split('.')[:-1]) + for libname in ext.libraries: + if ('%s.%s' % (pkg,libname)) in libnames: + return True + +if have_rtld or os.name=='nt': + # Build shared libraries + # def link_shared_object(self, objects, output_libname, output_dir=None, libraries=None, library_dirs=None, runtime_library_dirs=None, export_symbols=None, debug=0, extra_preargs=None, @@ -137,6 +219,7 @@ else: # Build static libraries everywhere else libtype = 'static' + def link_shared_object(self, objects, output_libname, output_dir=None, libraries=None, library_dirs=None, runtime_library_dirs=None, export_symbols=None, debug=0, extra_preargs=None, @@ -161,4 +244,3 @@ ) - From python-checkins at python.org Fri Jan 13 22:59:05 2006 From: python-checkins at python.org (phillip.eby) Date: Fri, 13 Jan 2006 22:59:05 +0100 (CET) Subject: [Python-checkins] r42033 - sandbox/trunk/setuptools/setuptools/command/build_ext.py Message-ID: <20060113215905.B86CB1E4002@bag.python.org> Author: phillip.eby Date: Fri Jan 13 22:59:05 2006 New Revision: 42033 Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py Log: Oops. Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/build_ext.py (original) +++ sandbox/trunk/setuptools/setuptools/command/build_ext.py Fri Jan 13 22:59:05 2006 @@ -164,13 +164,13 @@ def build_extension(self, ext): _compiler = self.compiler - _rpath = ext.runtime_library_path + _rpath = ext.runtime_library_dirs _ldirs = library_dirs try: if isinstance(ext,Library): self.compiler = self.shlib_compiler if have_rtld and self.links_to_dynamic(ext): - ext.runtime_library_path = _rpath + [os.curdir] + ext.runtime_library_dirs = _rpath + [os.curdir] ext.library_dirs = _ldirs + [ os.path.dirname( os.path.join(self.build_lib, @@ -184,7 +184,7 @@ _build_ext.build_extension(self,ext) finally: self.compiler = _compiler - ext.runtime_library_path = _rpath + ext.runtime_library_dirs = _rpath ext.library_dirs = _ldirs From python-checkins at python.org Fri Jan 13 23:32:57 2006 From: python-checkins at python.org (phillip.eby) Date: Fri, 13 Jan 2006 23:32:57 +0100 (CET) Subject: [Python-checkins] r42034 - sandbox/trunk/setuptools/setuptools/command/bdist_egg.py sandbox/trunk/setuptools/setuptools/command/build_ext.py Message-ID: <20060113223257.C61DA1E4002@bag.python.org> Author: phillip.eby Date: Fri Jan 13 23:32:57 2006 New Revision: 42034 Modified: sandbox/trunk/setuptools/setuptools/command/bdist_egg.py sandbox/trunk/setuptools/setuptools/command/build_ext.py Log: Don't write .py stubs except for actual extensions that don't already have them. Modified: sandbox/trunk/setuptools/setuptools/command/bdist_egg.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/bdist_egg.py (original) +++ sandbox/trunk/setuptools/setuptools/command/bdist_egg.py Fri Jan 13 23:32:57 2006 @@ -10,6 +10,7 @@ from distutils import log from pkg_resources import get_platform, Distribution from types import CodeType +from setuptools.extension import Library def write_stub(resource, pyfile): f = open(pyfile,'w') @@ -38,7 +39,6 @@ - class bdist_egg(Command): description = "create an \"egg\" distribution" @@ -174,7 +174,7 @@ cmd = self.call_command('install_lib', warn_dir=0) instcmd.root = old_root - ext_outputs = self.get_ext_outputs() + all_outputs, ext_outputs = self.get_ext_outputs() self.stubs = [] to_compile = [] for (p,ext_name) in enumerate(ext_outputs): @@ -204,11 +204,11 @@ self.call_command('install_scripts', install_dir=script_dir) native_libs = os.path.join(self.egg_info,"native_libs.txt") - if ext_outputs: + if all_outputs: log.info("writing %s" % native_libs) if not self.dry_run: libs_file = open(native_libs, 'wt') - libs_file.write('\n'.join(ext_outputs)) + libs_file.write('\n'.join(all_outputs)) libs_file.write('\n') libs_file.close() elif os.path.isfile(native_libs): @@ -288,28 +288,28 @@ def get_ext_outputs(self): """Get a list of relative paths to C extensions in the output distro""" - outputs = [] + all_outputs = [] + ext_outputs = [] + paths = {self.bdist_dir:''} for base, dirs, files in os.walk(self.bdist_dir): for filename in files: if os.path.splitext(filename)[1].lower() in NATIVE_EXTENSIONS: - outputs.append(paths[base]+filename) + all_outputs.append(paths[base]+filename) for filename in dirs: paths[os.path.join(base,filename)] = paths[base]+filename+'/' - - if not self.distribution.has_ext_modules(): - return outputs - - build_cmd = self.get_finalized_command('build_ext') - prefix_len = len(build_cmd.build_lib) + len(os.sep) - for filename in build_cmd.get_outputs(): - if os.path.splitext(filename)[1].lower() not in NATIVE_EXTENSIONS: - # only add files w/unrecognized extensions, since the - # recognized ones will already be in the list - outputs.append(filename[prefix_len:]) + if self.distribution.has_ext_modules(): + build_cmd = self.get_finalized_command('build_ext') + for ext in build_cmd.extensions: + if isinstance(ext,Library): + continue + fullname = build_cmd.get_ext_fullname(ext.name) + filename = build_cmd.get_ext_filename(fullname) + if not os.path.basename(filename).startswith('dl-'): + ext_outputs.append(filename) - return outputs + return all_outputs, ext_outputs NATIVE_EXTENSIONS = dict.fromkeys('.dll .so .dylib .pyd'.split()) Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/build_ext.py (original) +++ sandbox/trunk/setuptools/setuptools/command/build_ext.py Fri Jan 13 23:32:57 2006 @@ -165,12 +165,12 @@ def build_extension(self, ext): _compiler = self.compiler _rpath = ext.runtime_library_dirs - _ldirs = library_dirs + _ldirs = ext.library_dirs try: if isinstance(ext,Library): self.compiler = self.shlib_compiler - if have_rtld and self.links_to_dynamic(ext): - ext.runtime_library_dirs = _rpath + [os.curdir] + if self.links_to_dynamic(ext): + if have_rtld: ext.runtime_library_dirs = _rpath + [os.curdir] ext.library_dirs = _ldirs + [ os.path.dirname( os.path.join(self.build_lib, @@ -196,12 +196,12 @@ libnames = dict.fromkeys( [self.get_ext_fullname(lib.name) for lib in self.shlibs] ) - if not libnames: - return False pkg = '.'.join(self.get_ext_fullname(ext.name).split('.')[:-1]) + if pkg: pkg+='.' for libname in ext.libraries: - if ('%s.%s' % (pkg,libname)) in libnames: - return True + if pkg+libname in libnames: return True + return False + if have_rtld or os.name=='nt': # Build shared libraries From python-checkins at python.org Sat Jan 14 00:16:59 2006 From: python-checkins at python.org (phillip.eby) Date: Sat, 14 Jan 2006 00:16:59 +0100 (CET) Subject: [Python-checkins] r42035 - sandbox/trunk/setuptools/setuptools/command/build_ext.py Message-ID: <20060113231659.1DF361E4002@bag.python.org> Author: phillip.eby Date: Sat Jan 14 00:16:56 2006 New Revision: 42035 Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py Log: Implement LD_LIBRARY_PATH workaround stub loader Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/build_ext.py (original) +++ sandbox/trunk/setuptools/setuptools/command/build_ext.py Sat Jan 14 00:16:56 2006 @@ -16,7 +16,7 @@ if os.name != 'nt': try: from dl import RTLD_NOW - # XXX not ready for primetime yet: have_rtld = True + have_rtld = True except ImportError: pass @@ -82,41 +82,41 @@ def get_ext_filename(self, fullname): filename = _build_ext.get_ext_filename(self,fullname) - for ext in self.extensions: - if self.get_ext_fullname(ext.name)==fullname: - if isinstance(ext,Library): - fn, ext = os.path.splitext(filename) - return self.shlib_compiler.library_filename(fn,libtype) - elif have_rtld and self.links_to_dynamic(ext): - d,fn = os.path.split(filename) - return os.path.join(d,'dl-'+fn) - else: - return filename - raise AssertionError( - "Filename requested for nonexistent extension", fullname - ) + ext = self.ext_map[fullname] + if isinstance(ext,Library): + fn, ext = os.path.splitext(filename) + return self.shlib_compiler.library_filename(fn,libtype) + elif have_rtld and ext.links_to_dynamic: + d,fn = os.path.split(filename) + return os.path.join(d,'dl-'+fn) + else: + return filename def initialize_options(self): _build_ext.initialize_options(self) self.shlib_compiler = None self.shlibs = [] + self.ext_map = {} def finalize_options(self): _build_ext.finalize_options(self) + self.check_extensions_list(self.extensions) self.shlibs = [ext for ext in self.extensions or () if isinstance(ext,Library)] if self.shlibs: self.setup_shlib_compiler() - - - - - - - - - - + for ext in self.extensions: + fullname = ext._full_name = self.get_ext_fullname(ext.name) + self.ext_map[fullname] = ext + filename = ext._file_name = self.get_ext_filename(fullname) + ltd = ext._links_to_dynamic = \ + self.shlibs and self.links_to_dynamic(ext) or False + ext._needs_stub = ltd and have_rtld and not isinstance(ext,Library) + libdir = os.path.dirname(os.path.join(self.build_lib,filename)) + if ltd and libdir not in ext.library_dirs: + ext.library_dirs.append(libdir) + if ltd and have_rtld and os.curdir not in ext.runtime_library_dirs: + ext.runtime_library_dirs.append(os.curdir) @@ -164,44 +164,85 @@ def build_extension(self, ext): _compiler = self.compiler - _rpath = ext.runtime_library_dirs - _ldirs = ext.library_dirs try: if isinstance(ext,Library): self.compiler = self.shlib_compiler - if self.links_to_dynamic(ext): - if have_rtld: ext.runtime_library_dirs = _rpath + [os.curdir] - ext.library_dirs = _ldirs + [ - os.path.dirname( - os.path.join(self.build_lib, - self.get_ext_filename( - self.get_ext_fullname(ext.name) - ) - ) - ) - ] - # XXX if not lib, write .py stub _build_ext.build_extension(self,ext) + if ext._needs_stub: + self.write_stub(ext) finally: self.compiler = _compiler - ext.runtime_library_dirs = _rpath - ext.library_dirs = _ldirs + def write_stub(self, ext): + log.info("writing stub loader for %s",ext._full_name) + stub_file = os.path.join(self.build_lib, *ext._full_name.split('.')) + stub_file += '.py' + if not self.dry_run: + f = open(stub_file,'w') + f.write('\n'.join([ + "def __bootstrap__():", + " global __bootstrap__, __file__, __loader__", + " import sys, os, pkg_resources, imp, dl", + " __file__ = pkg_resources.resource_filename(__name__,%r)" + % os.path.basename(ext._file_name), + " del __bootstrap__", + " if '__loader__' in globals():", + " del __loader__", + " old_flags = sys.getdlopenflags()", + " old_dir = os.getcwd()", + " try:", + " os.chdir(os.path.dirname(__file__))", + " sys.setdlopenflags(dl.RTLD_NOW)", + " imp.load_dynamic(__name__,__file__)", + " finally:", + " sys.setdlopenflags(old_flags)", + " os.chdir(old_dir)", + "__bootstrap__()", + "" # terminal \n + ])) + f.close() + self.get_finalized_command('build_py').byte_compile(stub_file) def links_to_dynamic(self, ext): """Return true if 'ext' links to a dynamic lib in the same package""" # XXX this should check to ensure the lib is actually being built # XXX as dynamic, and not just using a locally-found version or a # XXX static-compiled version - libnames = dict.fromkeys( - [self.get_ext_fullname(lib.name) for lib in self.shlibs] - ) - pkg = '.'.join(self.get_ext_fullname(ext.name).split('.')[:-1]) - if pkg: pkg+='.' + libnames = dict.fromkeys([lib._full_name for lib in self.shlibs]) + pkg = '.'.join(ext._full_name.split('.')[:-1]+['']) for libname in ext.libraries: if pkg+libname in libnames: return True return False + def get_outputs(self): + outputs = _build_ext.get_outputs(self) + optimize = self.get_finalized_command('build_py').optimize + for ext in self.extensions: + if ext._needs_stub: + base = os.path.join(self.build_lib, *ext._full_name.split('.')) + outputs.append(base+'.py') + outputs.append(base+'.pyc') + if optimize: + outputs.append(base+'.pyo') + return outputs + + + + + + + + + + + + + + + + + + if have_rtld or os.name=='nt': # Build shared libraries From python-checkins at python.org Sat Jan 14 00:32:56 2006 From: python-checkins at python.org (phillip.eby) Date: Sat, 14 Jan 2006 00:32:56 +0100 (CET) Subject: [Python-checkins] r42036 - sandbox/trunk/setuptools/setuptools/command/build_ext.py Message-ID: <20060113233256.B22331E4007@bag.python.org> Author: phillip.eby Date: Sat Jan 14 00:32:55 2006 New Revision: 42036 Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py Log: Write stub files correctly for build_ext --inplace Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/build_ext.py (original) +++ sandbox/trunk/setuptools/setuptools/command/build_ext.py Sat Jan 14 00:32:55 2006 @@ -10,6 +10,7 @@ from setuptools.extension import Library from distutils.ccompiler import new_compiler from distutils.sysconfig import customize_compiler +from distutils import log have_rtld = False libtype = 'shared' @@ -38,8 +39,7 @@ - -class build_ext(_build_ext): +class build_ext(_build_ext): def run(self): """Build extensions in build directory, then copy if --inplace""" old_inplace, self.inplace = self.inplace, 0 @@ -50,7 +50,7 @@ def copy_extensions_to_source(self): build_py = self.get_finalized_command('build_py') - for ext in self.extensions or (): + for ext in self.extensions: fullname = self.get_ext_fullname(ext.name) filename = self.get_ext_filename(fullname) modpath = fullname.split('.') @@ -66,6 +66,9 @@ src_filename, dest_filename, verbose=self.verbose, dry_run=self.dry_run ) + if ext._needs_stub: + self.write_stub(package_dir, ext, False) + if _build_ext is not _du_build_ext: # Workaround for problems using some Pyrex versions w/SWIG and/or 2.4 @@ -77,16 +80,13 @@ - - - def get_ext_filename(self, fullname): filename = _build_ext.get_ext_filename(self,fullname) ext = self.ext_map[fullname] if isinstance(ext,Library): fn, ext = os.path.splitext(filename) return self.shlib_compiler.library_filename(fn,libtype) - elif have_rtld and ext.links_to_dynamic: + elif have_rtld and ext._links_to_dynamic: d,fn = os.path.split(filename) return os.path.join(d,'dl-'+fn) else: @@ -100,18 +100,19 @@ def finalize_options(self): _build_ext.finalize_options(self) - self.check_extensions_list(self.extensions) - self.shlibs = [ext for ext in self.extensions or () + self.extensions = self.extensions or [] + self.check_extensions_list() + self.shlibs = [ext for ext in self.extensions if isinstance(ext,Library)] if self.shlibs: self.setup_shlib_compiler() for ext in self.extensions: fullname = ext._full_name = self.get_ext_fullname(ext.name) self.ext_map[fullname] = ext - filename = ext._file_name = self.get_ext_filename(fullname) ltd = ext._links_to_dynamic = \ self.shlibs and self.links_to_dynamic(ext) or False ext._needs_stub = ltd and have_rtld and not isinstance(ext,Library) + filename = ext._file_name = self.get_ext_filename(fullname) libdir = os.path.dirname(os.path.join(self.build_lib,filename)) if ltd and libdir not in ext.library_dirs: ext.library_dirs.append(libdir) @@ -120,11 +121,10 @@ - def setup_shlib_compiler(self): compiler = self.shlib_compiler = new_compiler( compiler=self.compiler, dry_run=self.dry_run, force=self.force - ) + ) customize_compiler(compiler) if sys.platform == "darwin": # XXX need to fix up compiler_so:ccshared + linker_so:ldshared too @@ -155,7 +155,7 @@ if isinstance(ext,Library): return ext.export_symbols return _build_ext.get_export_symbols(self,ext) - + @@ -169,14 +169,13 @@ self.compiler = self.shlib_compiler _build_ext.build_extension(self,ext) if ext._needs_stub: - self.write_stub(ext) + self.write_stub(self.build_lib, ext) finally: self.compiler = _compiler - def write_stub(self, ext): + def write_stub(self, output_dir, ext, compile=True): log.info("writing stub loader for %s",ext._full_name) - stub_file = os.path.join(self.build_lib, *ext._full_name.split('.')) - stub_file += '.py' + stub_file = os.path.join(output_dir, *ext._full_name.split('.'))+'.py' if not self.dry_run: f = open(stub_file,'w') f.write('\n'.join([ @@ -201,7 +200,8 @@ "" # terminal \n ])) f.close() - self.get_finalized_command('build_py').byte_compile(stub_file) + if compile: + self.get_finalized_command('build_py').byte_compile(stub_file) def links_to_dynamic(self, ext): """Return true if 'ext' links to a dynamic lib in the same package""" @@ -246,7 +246,7 @@ if have_rtld or os.name=='nt': # Build shared libraries - # + # def link_shared_object(self, objects, output_libname, output_dir=None, libraries=None, library_dirs=None, runtime_library_dirs=None, export_symbols=None, debug=0, extra_preargs=None, @@ -272,13 +272,13 @@ #export_symbols=None, extra_preargs=None, extra_postargs=None, #build_temp=None - assert output_dir is None # distutils build_ext doesn't pass this + assert output_dir is None # distutils build_ext doesn't pass this output_dir,filename = os.path.split(output_libname) basename, ext = os.path.splitext(filename) if self.library_filename("x").startswith('lib'): # strip 'lib' prefix; this is kludgy if some platform uses # a different prefix - basename = basename[3:] + basename = basename[3:] self.create_static_lib( objects, basename, output_dir, debug, target_lang From python-checkins at python.org Sat Jan 14 00:52:43 2006 From: python-checkins at python.org (phillip.eby) Date: Sat, 14 Jan 2006 00:52:43 +0100 (CET) Subject: [Python-checkins] r42037 - sandbox/trunk/setuptools/setuptools/command/build_ext.py sandbox/trunk/setuptools/setuptools/command/install_lib.py Message-ID: <20060113235243.56F111E4007@bag.python.org> Author: phillip.eby Date: Sat Jan 14 00:52:42 2006 New Revision: 42037 Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py sandbox/trunk/setuptools/setuptools/command/install_lib.py Log: Ensure installed stubs get compiled, even if there are no "pure" modules present. Also, don't bother compiling the stub prior to installation. Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/build_ext.py (original) +++ sandbox/trunk/setuptools/setuptools/command/build_ext.py Sat Jan 14 00:52:42 2006 @@ -67,7 +67,7 @@ dry_run=self.dry_run ) if ext._needs_stub: - self.write_stub(package_dir, ext, False) + self.write_stub(package_dir or os.curdir, ext) if _build_ext is not _du_build_ext: @@ -101,7 +101,7 @@ def finalize_options(self): _build_ext.finalize_options(self) self.extensions = self.extensions or [] - self.check_extensions_list() + self.check_extensions_list(self.extensions) self.shlibs = [ext for ext in self.extensions if isinstance(ext,Library)] if self.shlibs: @@ -169,12 +169,14 @@ self.compiler = self.shlib_compiler _build_ext.build_extension(self,ext) if ext._needs_stub: - self.write_stub(self.build_lib, ext) + self.write_stub( + self.get_finalized_command('build_py').build_lib, ext + ) finally: self.compiler = _compiler - def write_stub(self, output_dir, ext, compile=True): - log.info("writing stub loader for %s",ext._full_name) + def write_stub(self, output_dir, ext): + log.info("writing stub loader for %s to %s",ext._full_name, output_dir) stub_file = os.path.join(output_dir, *ext._full_name.split('.'))+'.py' if not self.dry_run: f = open(stub_file,'w') @@ -200,8 +202,6 @@ "" # terminal \n ])) f.close() - if compile: - self.get_finalized_command('build_py').byte_compile(stub_file) def links_to_dynamic(self, ext): """Return true if 'ext' links to a dynamic lib in the same package""" Modified: sandbox/trunk/setuptools/setuptools/command/install_lib.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/install_lib.py (original) +++ sandbox/trunk/setuptools/setuptools/command/install_lib.py Sat Jan 14 00:52:42 2006 @@ -14,3 +14,12 @@ bytecode_files.append(py_file + "o") return bytecode_files + + + def run(self): + self.build() + outfiles = self.install() + if outfiles is not None: + # always compile, in case we have any extension stubs to deal with + self.byte_compile(outfiles) + From python-checkins at python.org Sat Jan 14 01:12:08 2006 From: python-checkins at python.org (phillip.eby) Date: Sat, 14 Jan 2006 01:12:08 +0100 (CET) Subject: [Python-checkins] r42038 - sandbox/trunk/setuptools/setuptools/command/build_ext.py Message-ID: <20060114001208.0BEF81E400E@bag.python.org> Author: phillip.eby Date: Sat Jan 14 01:12:03 2006 New Revision: 42038 Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py Log: Don't keep the stub .py file around, just the .pyc/.pyo. Don't write a stub to the source tree if it would overwrite an existing .py file. Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/build_ext.py (original) +++ sandbox/trunk/setuptools/setuptools/command/build_ext.py Sat Jan 14 01:12:03 2006 @@ -67,7 +67,7 @@ dry_run=self.dry_run ) if ext._needs_stub: - self.write_stub(package_dir or os.curdir, ext) + self.write_stub(package_dir or os.curdir, ext, True) if _build_ext is not _du_build_ext: @@ -175,34 +175,6 @@ finally: self.compiler = _compiler - def write_stub(self, output_dir, ext): - log.info("writing stub loader for %s to %s",ext._full_name, output_dir) - stub_file = os.path.join(output_dir, *ext._full_name.split('.'))+'.py' - if not self.dry_run: - f = open(stub_file,'w') - f.write('\n'.join([ - "def __bootstrap__():", - " global __bootstrap__, __file__, __loader__", - " import sys, os, pkg_resources, imp, dl", - " __file__ = pkg_resources.resource_filename(__name__,%r)" - % os.path.basename(ext._file_name), - " del __bootstrap__", - " if '__loader__' in globals():", - " del __loader__", - " old_flags = sys.getdlopenflags()", - " old_dir = os.getcwd()", - " try:", - " os.chdir(os.path.dirname(__file__))", - " sys.setdlopenflags(dl.RTLD_NOW)", - " imp.load_dynamic(__name__,__file__)", - " finally:", - " sys.setdlopenflags(old_flags)", - " os.chdir(old_dir)", - "__bootstrap__()", - "" # terminal \n - ])) - f.close() - def links_to_dynamic(self, ext): """Return true if 'ext' links to a dynamic lib in the same package""" # XXX this should check to ensure the lib is actually being built @@ -231,17 +203,45 @@ - - - - - - - - - - - + def write_stub(self, output_dir, ext, compile=False): + log.info("writing stub loader for %s to %s",ext._full_name, output_dir) + stub_file = os.path.join(output_dir, *ext._full_name.split('.'))+'.py' + if compile and os.path.exists(stub_file): + raise DistutilsError(stub_file+" already exists! Please delete.") + if not self.dry_run: + f = open(stub_file,'w') + f.write('\n'.join([ + "def __bootstrap__():", + " global __bootstrap__, __file__, __loader__", + " import sys, os, pkg_resources, imp, dl", + " __file__ = pkg_resources.resource_filename(__name__,%r)" + % os.path.basename(ext._file_name), + " del __bootstrap__", + " if '__loader__' in globals():", + " del __loader__", + " old_flags = sys.getdlopenflags()", + " old_dir = os.getcwd()", + " try:", + " os.chdir(os.path.dirname(__file__))", + " sys.setdlopenflags(dl.RTLD_NOW)", + " imp.load_dynamic(__name__,__file__)", + " finally:", + " sys.setdlopenflags(old_flags)", + " os.chdir(old_dir)", + "__bootstrap__()", + "" # terminal \n + ])) + f.close() + if compile: + from distutils.util import byte_compile + byte_compile([stub_file], optimize=0, + force=True, dry_run=self.dry_run) + optimize = self.get_finalized_command('install_lib').optimize + if optimize > 0: + byte_compile([stub_file], optimize=optimize, + force=True, dry_run=self.dry_run) + if os.path.exists(stub_file) and not self.dry_run: + os.unlink(stub_file) if have_rtld or os.name=='nt': From python-checkins at python.org Sat Jan 14 01:13:21 2006 From: python-checkins at python.org (phillip.eby) Date: Sat, 14 Jan 2006 01:13:21 +0100 (CET) Subject: [Python-checkins] r42039 - sandbox/trunk/setuptools/setuptools/command/build_ext.py Message-ID: <20060114001321.D5E111E400A@bag.python.org> Author: phillip.eby Date: Sat Jan 14 01:13:18 2006 New Revision: 42039 Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py Log: Fix broken import Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/build_ext.py (original) +++ sandbox/trunk/setuptools/setuptools/command/build_ext.py Sat Jan 14 01:13:18 2006 @@ -11,6 +11,7 @@ from distutils.ccompiler import new_compiler from distutils.sysconfig import customize_compiler from distutils import log +from distutils.errors import * have_rtld = False libtype = 'shared' From python-checkins at python.org Sat Jan 14 02:34:17 2006 From: python-checkins at python.org (phillip.eby) Date: Sat, 14 Jan 2006 02:34:17 +0100 (CET) Subject: [Python-checkins] r42040 - sandbox/trunk/setuptools/setuptools/command/build_ext.py Message-ID: <20060114013417.C03D21E4002@bag.python.org> Author: phillip.eby Date: Sat Jan 14 02:34:13 2006 New Revision: 42040 Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py Log: Attempt to define some reasonable flags for OS X linker. Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/build_ext.py (original) +++ sandbox/trunk/setuptools/setuptools/command/build_ext.py Sat Jan 14 02:34:13 2006 @@ -9,7 +9,7 @@ from distutils.file_util import copy_file from setuptools.extension import Library from distutils.ccompiler import new_compiler -from distutils.sysconfig import customize_compiler +from distutils.sysconfig import customize_compiler, _config_vars from distutils import log from distutils.errors import * @@ -39,7 +39,6 @@ - class build_ext(_build_ext): def run(self): """Build extensions in build directory, then copy if --inplace""" @@ -126,10 +125,19 @@ compiler = self.shlib_compiler = new_compiler( compiler=self.compiler, dry_run=self.dry_run, force=self.force ) - customize_compiler(compiler) if sys.platform == "darwin": - # XXX need to fix up compiler_so:ccshared + linker_so:ldshared too - compiler.shared_lib_extension = ".dylib" + tmp = _config_vars.copy() + try: + # XXX Help! I don't have any idea whether these are right... + _config_vars['LDSHARED'] = "-dynamiclib -undefined dynamic_lookup" + _config_vars['CCSHARED'] = " -dynamiclib" + _config_vars['SO'] = ".dylib" + customize_compiler(compiler) + finally: + _config_vars.clear() + _config_vars.update(tmp) + else: + customize_compiler(compiler) if self.include_dirs is not None: compiler.set_include_dirs(self.include_dirs) @@ -152,17 +160,13 @@ # hack so distutils' build_extension() builds a library instead compiler.link_shared_object = link_shared_object.__get__(compiler) + + def get_export_symbols(self, ext): if isinstance(ext,Library): return ext.export_symbols return _build_ext.get_export_symbols(self,ext) - - - - - - def build_extension(self, ext): _compiler = self.compiler try: @@ -199,11 +203,6 @@ outputs.append(base+'.pyo') return outputs - - - - - def write_stub(self, output_dir, ext, compile=False): log.info("writing stub loader for %s to %s",ext._full_name, output_dir) stub_file = os.path.join(output_dir, *ext._full_name.split('.'))+'.py' From python-checkins at python.org Sat Jan 14 08:03:00 2006 From: python-checkins at python.org (neal.norwitz) Date: Sat, 14 Jan 2006 08:03:00 +0100 (CET) Subject: [Python-checkins] r42041 - in python/branches/release24-maint: Lib/test/output/test_mmap Lib/test/test_mmap.py Misc/ACKS Misc/NEWS Modules/mmapmodule.c Message-ID: <20060114070300.C38F01E4002@bag.python.org> Author: neal.norwitz Date: Sat Jan 14 08:02:53 2006 New Revision: 42041 Modified: python/branches/release24-maint/Lib/test/output/test_mmap python/branches/release24-maint/Lib/test/test_mmap.py python/branches/release24-maint/Misc/ACKS python/branches/release24-maint/Misc/NEWS python/branches/release24-maint/Modules/mmapmodule.c Log: Backport: Fix SF bug #1402308, segfault when using mmap(-1, ...) This didn't crash on Linux, but valgrind complained. I'm not sure if this test is valid on Windows. Modified: python/branches/release24-maint/Lib/test/output/test_mmap ============================================================================== --- python/branches/release24-maint/Lib/test/output/test_mmap (original) +++ python/branches/release24-maint/Lib/test/output/test_mmap Sat Jan 14 08:02:53 2006 @@ -31,4 +31,5 @@ Modifying copy-on-write memory map. Ensuring copy-on-write maps cannot be resized. Ensuring invalid access parameter raises exception. + Try opening a bad file descriptor... Test passed Modified: python/branches/release24-maint/Lib/test/test_mmap.py ============================================================================== --- python/branches/release24-maint/Lib/test/test_mmap.py (original) +++ python/branches/release24-maint/Lib/test/test_mmap.py Sat Jan 14 08:02:53 2006 @@ -281,6 +281,14 @@ except OSError: pass + print ' Try opening a bad file descriptor...' + try: + mmap.mmap(-1, 4096) + except mmap.error: + pass + else: + verify(0, 'expected a mmap.error but did not get it') + # Do a tougher .find() test. SF bug 515943 pointed out that, in 2.2, # searching for data with embedded \0 bytes didn't work. f = open(TESTFN, 'w+') Modified: python/branches/release24-maint/Misc/ACKS ============================================================================== --- python/branches/release24-maint/Misc/ACKS (original) +++ python/branches/release24-maint/Misc/ACKS Sat Jan 14 08:02:53 2006 @@ -520,6 +520,7 @@ Neil Schemenauer David Scherer Gregor Schmid +Ralf Schmitt Peter Schneider-Kamp Sam Schulenburg Stefan Schwarzer Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Sat Jan 14 08:02:53 2006 @@ -206,6 +206,8 @@ Extension Modules ----------------- +- Bug #1402308, (possible) segfault when using mmap.mmap(-1, ...) + - Bug #1400822, _curses over{lay,write} doesn't work when passing 6 ints. Also fix ungetmouse() which did not accept arguments properly. The code now conforms to the documented signature. Modified: python/branches/release24-maint/Modules/mmapmodule.c ============================================================================== --- python/branches/release24-maint/Modules/mmapmodule.c (original) +++ python/branches/release24-maint/Modules/mmapmodule.c Sat Jan 14 08:02:53 2006 @@ -914,6 +914,7 @@ #endif m_obj = PyObject_New (mmap_object, &mmap_object_type); if (m_obj == NULL) {return NULL;} + m_obj->data = NULL; m_obj->size = (size_t) map_size; m_obj->pos = (size_t) 0; m_obj->fd = dup(fd); From python-checkins at python.org Sat Jan 14 08:03:47 2006 From: python-checkins at python.org (neal.norwitz) Date: Sat, 14 Jan 2006 08:03:47 +0100 (CET) Subject: [Python-checkins] r42042 - python/branches/release24-maint/Misc/NEWS Message-ID: <20060114070347.48E4E1E4002@bag.python.org> Author: neal.norwitz Date: Sat Jan 14 08:03:44 2006 New Revision: 42042 Modified: python/branches/release24-maint/Misc/NEWS Log: Fix typo Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Sat Jan 14 08:03:44 2006 @@ -68,7 +68,7 @@ Library ------- -- Bug #1394565: SimpleHTTPServer now doesn't choke on query paramters +- Bug #1394565: SimpleHTTPServer now doesn't choke on query parameters any more. - Bug #1403410: The warnings module now doesn't get confused From python-checkins at python.org Sat Jan 14 08:05:16 2006 From: python-checkins at python.org (neal.norwitz) Date: Sat, 14 Jan 2006 08:05:16 +0100 (CET) Subject: [Python-checkins] r42043 - python/trunk/Misc/NEWS Message-ID: <20060114070516.95F571E4021@bag.python.org> Author: neal.norwitz Date: Sat Jan 14 08:05:13 2006 New Revision: 42043 Modified: python/trunk/Misc/NEWS Log: Fix typo Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Sat Jan 14 08:05:13 2006 @@ -335,7 +335,7 @@ Library ------- -- Bug #1394565: SimpleHTTPServer now doesn't choke on query paramters +- Bug #1394565: SimpleHTTPServer now doesn't choke on query parameters any more. - Bug #1403410: The warnings module now doesn't get confused From python-checkins at python.org Sat Jan 14 11:58:31 2006 From: python-checkins at python.org (armin.rigo) Date: Sat, 14 Jan 2006 11:58:31 +0100 (CET) Subject: [Python-checkins] r42044 - python/trunk/Lib/test/crashers/dangerous_subclassing.py python/trunk/Lib/test/crashers/infinite_rec_1.py python/trunk/Lib/test/crashers/infinite_rec_2.py python/trunk/Lib/test/crashers/infinite_rec_3.py python/trunk/Lib/test/crashers/infinite_rec_4.py python/trunk/Lib/test/crashers/infinite_rec_5.py python/trunk/Lib/test/crashers/loosing_dict_ref.py python/trunk/Lib/test/crashers/modify_dict_attr.py Message-ID: <20060114105831.7E4681E4002@bag.python.org> Author: armin.rigo Date: Sat Jan 14 11:58:30 2006 New Revision: 42044 Added: python/trunk/Lib/test/crashers/dangerous_subclassing.py python/trunk/Lib/test/crashers/infinite_rec_1.py python/trunk/Lib/test/crashers/infinite_rec_2.py python/trunk/Lib/test/crashers/infinite_rec_3.py python/trunk/Lib/test/crashers/infinite_rec_4.py python/trunk/Lib/test/crashers/infinite_rec_5.py python/trunk/Lib/test/crashers/loosing_dict_ref.py python/trunk/Lib/test/crashers/modify_dict_attr.py Log: collected my segfaulting Python examples from the SF trackers (is the purpose of the crashers directory to scare people? :-) Added: python/trunk/Lib/test/crashers/dangerous_subclassing.py ============================================================================== --- (empty file) +++ python/trunk/Lib/test/crashers/dangerous_subclassing.py Sat Jan 14 11:58:30 2006 @@ -0,0 +1,12 @@ + +# http://python.org/sf/1174712 + +import types + +class X(types.ModuleType, str): + """Such a subclassing is incorrectly allowed -- + see the SF bug report for explanations""" + +if __name__ == '__main__': + X('name') # segfault: ModuleType.__init__() reads + # the dict at the wrong offset Added: python/trunk/Lib/test/crashers/infinite_rec_1.py ============================================================================== --- (empty file) +++ python/trunk/Lib/test/crashers/infinite_rec_1.py Sat Jan 14 11:58:30 2006 @@ -0,0 +1,11 @@ + +# http://python.org/sf/1202533 + +import new, operator + +class A: + pass +A.__mul__ = new.instancemethod(operator.mul, None, A) + +if __name__ == '__main__': + A()*2 # segfault: infinite recursion in C Added: python/trunk/Lib/test/crashers/infinite_rec_2.py ============================================================================== --- (empty file) +++ python/trunk/Lib/test/crashers/infinite_rec_2.py Sat Jan 14 11:58:30 2006 @@ -0,0 +1,10 @@ + +# http://python.org/sf/1202533 + +class A(str): + __get__ = getattr + +if __name__ == '__main__': + a = A('a') + A.a = a + a.a # segfault: infinite recursion in C Added: python/trunk/Lib/test/crashers/infinite_rec_3.py ============================================================================== --- (empty file) +++ python/trunk/Lib/test/crashers/infinite_rec_3.py Sat Jan 14 11:58:30 2006 @@ -0,0 +1,9 @@ + +# http://python.org/sf/1202533 + +class A(object): + pass +A.__call__ = A() + +if __name__ == '__main__': + A()() # segfault: infinite recursion in C Added: python/trunk/Lib/test/crashers/infinite_rec_4.py ============================================================================== --- (empty file) +++ python/trunk/Lib/test/crashers/infinite_rec_4.py Sat Jan 14 11:58:30 2006 @@ -0,0 +1,7 @@ + +# http://python.org/sf/1202533 + +if __name__ == '__main__': + lst = [apply] + lst.append(lst) + apply(*lst) # segfault: infinite recursion in C Added: python/trunk/Lib/test/crashers/infinite_rec_5.py ============================================================================== --- (empty file) +++ python/trunk/Lib/test/crashers/infinite_rec_5.py Sat Jan 14 11:58:30 2006 @@ -0,0 +1,10 @@ + +# http://python.org/sf/1267884 + +import types + +class C: + __str__ = types.InstanceType.__str__ + +if __name__ == '__main__': + str(C()) # segfault: infinite recursion in C Added: python/trunk/Lib/test/crashers/loosing_dict_ref.py ============================================================================== --- (empty file) +++ python/trunk/Lib/test/crashers/loosing_dict_ref.py Sat Jan 14 11:58:30 2006 @@ -0,0 +1,21 @@ + +# http://python.org/sf/1303614 + +class Strange(object): + def __hash__(self): + return hash('hello') + + def __eq__(self, other): + x.__dict__ = {} # the old x.__dict__ is deallocated + return False + + +class X(object): + pass + +if __name__ == '__main__': + v = 123 + x = X() + x.__dict__ = {Strange(): 42, + 'hello': v+456} + x.hello # segfault: the above dict is accessed after it's deallocated Added: python/trunk/Lib/test/crashers/modify_dict_attr.py ============================================================================== --- (empty file) +++ python/trunk/Lib/test/crashers/modify_dict_attr.py Sat Jan 14 11:58:30 2006 @@ -0,0 +1,19 @@ + +# http://python.org/sf/1303614 + +class Y(object): + pass + +class type_with_modifiable_dict(Y, type): + pass + +class MyClass(object): + """This class has its __dict__ attribute completely exposed: + user code can read, reassign and even delete it. + """ + __metaclass__ = type_with_modifiable_dict + + +if __name__ == '__main__': + del MyClass.__dict__ # if we set tp_dict to NULL, + print MyClass # doing anything with MyClass segfaults From g.brandl-nospam at gmx.net Sat Jan 14 12:25:23 2006 From: g.brandl-nospam at gmx.net (Georg Brandl) Date: Sat, 14 Jan 2006 12:25:23 +0100 Subject: [Python-checkins] r42044 - python/trunk/Lib/test/crashers/dangerous_subclassing.py python/trunk/Lib/test/crashers/infinite_rec_1.py python/trunk/Lib/test/crashers/infinite_rec_2.py python/trunk/Lib/test/crashers/infinite_rec_3.py python/trunk/Lib/test/crashers/infinite_rec_4.py python/trunk/Lib/test/crashers/infinite_rec_5.py python/trunk/Lib/test/crashers/loosing_dict_ref.py python/trunk/Lib/test/crashers/modify_dict_attr.py In-Reply-To: <20060114105831.7E4681E4002@bag.python.org> References: <20060114105831.7E4681E4002@bag.python.org> Message-ID: armin.rigo wrote: > Author: armin.rigo > Date: Sat Jan 14 11:58:30 2006 > New Revision: 42044 > > Added: > python/trunk/Lib/test/crashers/dangerous_subclassing.py > python/trunk/Lib/test/crashers/infinite_rec_1.py > python/trunk/Lib/test/crashers/infinite_rec_2.py > python/trunk/Lib/test/crashers/infinite_rec_3.py > python/trunk/Lib/test/crashers/infinite_rec_4.py > python/trunk/Lib/test/crashers/infinite_rec_5.py > python/trunk/Lib/test/crashers/loosing_dict_ref.py > python/trunk/Lib/test/crashers/modify_dict_attr.py > Log: > collected my segfaulting Python examples from the SF trackers > (is the purpose of the crashers directory to scare people? :-) Question is whether to distribute this directory with the stdlib ;) Georg From python-checkins at python.org Sat Jan 14 14:02:54 2006 From: python-checkins at python.org (fredrik.lundh) Date: Sat, 14 Jan 2006 14:02:54 +0100 (CET) Subject: [Python-checkins] r42045 - python/trunk/Doc/ACKS Message-ID: <20060114130254.0C7A01E4002@bag.python.org> Author: fredrik.lundh Date: Sat Jan 14 14:02:53 2006 New Revision: 42045 Modified: python/trunk/Doc/ACKS Log: added kjohnson to the ACKS list Modified: python/trunk/Doc/ACKS ============================================================================== --- python/trunk/Doc/ACKS (original) +++ python/trunk/Doc/ACKS Sat Jan 14 14:02:53 2006 @@ -93,6 +93,7 @@ Jack Jansen Philip H. Jensen Pedro Diaz Jimenez +Kent Johnson Lucas de Jonge Andreas Jung Robert Kern From python-checkins at python.org Sat Jan 14 19:13:03 2006 From: python-checkins at python.org (martin.v.loewis) Date: Sat, 14 Jan 2006 19:13:03 +0100 (CET) Subject: [Python-checkins] r42046 - in python/trunk: Doc/lib/libsocket.tex Misc/ACKS Misc/NEWS Modules/socketmodule.c Modules/socketmodule.h configure configure.in pyconfig.h.in Message-ID: <20060114181303.A2EEA1E4002@bag.python.org> Author: martin.v.loewis Date: Sat Jan 14 19:12:57 2006 New Revision: 42046 Modified: python/trunk/Doc/lib/libsocket.tex python/trunk/Misc/ACKS python/trunk/Misc/NEWS python/trunk/Modules/socketmodule.c python/trunk/Modules/socketmodule.h python/trunk/configure python/trunk/configure.in python/trunk/pyconfig.h.in Log: Patch #1103116: AF_NETLINK sockets basic support. Modified: python/trunk/Doc/lib/libsocket.tex ============================================================================== --- python/trunk/Doc/lib/libsocket.tex (original) +++ python/trunk/Doc/lib/libsocket.tex Sat Jan 14 19:12:57 2006 @@ -68,6 +68,9 @@ configuration. For deterministic behavior use a numeric address in \var{host} portion. +\versionadded[2.5]{AF_NETLINK sockets are represented as +pairs \code{\var{pid}, \var{groups}}.} + All errors raise exceptions. The normal exceptions for invalid argument types and out-of-memory conditions can be raised; errors related to socket or address semantics raise the error Modified: python/trunk/Misc/ACKS ============================================================================== --- python/trunk/Misc/ACKS (original) +++ python/trunk/Misc/ACKS Sat Jan 14 19:12:57 2006 @@ -58,6 +58,7 @@ Stephen Bevan Ron Bickers Dominic Binks +Philippe Biondi Stuart Bishop Roy Bixler Martin Bless Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Sat Jan 14 19:12:57 2006 @@ -216,6 +216,8 @@ Extension Modules ----------------- +- Patch #1103116: Basic AF_NETLINK support. + - Bug #1402308, (possible) segfault when using mmap.mmap(-1, ...) - Bug #1400822, _curses over{lay,write} doesn't work when passing 6 ints. Modified: python/trunk/Modules/socketmodule.c ============================================================================== --- python/trunk/Modules/socketmodule.c (original) +++ python/trunk/Modules/socketmodule.c Sat Jan 14 19:12:57 2006 @@ -7,7 +7,7 @@ Limitations: - Only AF_INET, AF_INET6 and AF_UNIX address families are supported in a - portable manner, though AF_PACKET is supported under Linux. + portable manner, though AF_PACKET and AF_NETLINK are supported under Linux. - No read/write operations (use sendall/recv or makefile instead). - Additional restrictions apply on some non-Unix platforms (compensated for by socket.py). @@ -954,6 +954,14 @@ } #endif /* AF_UNIX */ +#if defined(AF_NETLINK) + case AF_NETLINK: + { + struct sockaddr_nl *a = (struct sockaddr_nl *) addr; + return Py_BuildValue("ii", a->nl_pid, a->nl_groups); + } +#endif /* AF_NETLINK */ + #ifdef ENABLE_IPV6 case AF_INET6: { @@ -1090,6 +1098,31 @@ } #endif /* AF_UNIX */ +#if defined(AF_NETLINK) + case AF_NETLINK: + { + struct sockaddr_nl* addr; + int pid, groups; + addr = (struct sockaddr_nl *)&(s->sock_addr).nl; + if (!PyTuple_Check(args)) { + PyErr_Format( + PyExc_TypeError, + "getsockaddrarg: " + "AF_NETLINK address must be tuple, not %.500s", + args->ob_type->tp_name); + return 0; + } + if (!PyArg_ParseTuple(args, "II:getsockaddrarg", &pid, &groups)) + return 0; + addr->nl_family = AF_NETLINK; + addr->nl_pid = pid; + addr->nl_groups = groups; + *addr_ret = (struct sockaddr *) addr; + *len_ret = sizeof(*addr); + return 1; + } +#endif + case AF_INET: { struct sockaddr_in* addr; @@ -1286,6 +1319,13 @@ return 1; } #endif /* AF_UNIX */ +#if defined(AF_NETLINK) + case AF_NETLINK: + { + *len_ret = sizeof (struct sockaddr_nl); + return 1; + } +#endif case AF_INET: { @@ -3947,6 +3987,18 @@ #ifdef AF_NETLINK /* */ PyModule_AddIntConstant(m, "AF_NETLINK", AF_NETLINK); + PyModule_AddIntConstant(m, "NETLINK_ROUTE", NETLINK_ROUTE); + PyModule_AddIntConstant(m, "NETLINK_SKIP", NETLINK_SKIP); + PyModule_AddIntConstant(m, "NETLINK_USERSOCK", NETLINK_USERSOCK); + PyModule_AddIntConstant(m, "NETLINK_FIREWALL", NETLINK_FIREWALL); + PyModule_AddIntConstant(m, "NETLINK_TCPDIAG", NETLINK_TCPDIAG); + PyModule_AddIntConstant(m, "NETLINK_NFLOG", NETLINK_NFLOG); + PyModule_AddIntConstant(m, "NETLINK_XFRM", NETLINK_XFRM); + PyModule_AddIntConstant(m, "NETLINK_ARPD", NETLINK_ARPD); + PyModule_AddIntConstant(m, "NETLINK_ROUTE6", NETLINK_ROUTE6); + PyModule_AddIntConstant(m, "NETLINK_IP6_FW", NETLINK_IP6_FW); + PyModule_AddIntConstant(m, "NETLINK_DNRTMSG", NETLINK_DNRTMSG); + PyModule_AddIntConstant(m, "NETLINK_TAPBASE", NETLINK_TAPBASE); #endif #ifdef AF_ROUTE /* Alias to emulate 4.4BSD */ Modified: python/trunk/Modules/socketmodule.h ============================================================================== --- python/trunk/Modules/socketmodule.h (original) +++ python/trunk/Modules/socketmodule.h Sat Jan 14 19:12:57 2006 @@ -32,6 +32,12 @@ # undef AF_UNIX #endif +#ifdef HAVE_LINUX_NETLINK_H +# include +#else +# undef AF_NETLINK +#endif + #ifdef HAVE_BLUETOOTH_BLUETOOTH_H #include #include @@ -78,6 +84,9 @@ #ifdef AF_UNIX struct sockaddr_un un; #endif +#ifdef AF_NETLINK + struct sockaddr_nl nl; +#endif #ifdef ENABLE_IPV6 struct sockaddr_in6 in6; struct sockaddr_storage storage; Modified: python/trunk/configure ============================================================================== --- python/trunk/configure (original) +++ python/trunk/configure Sat Jan 14 19:12:57 2006 @@ -1,5 +1,5 @@ #! /bin/sh -# From configure.in Revision: 41975 . +# From configure.in Revision: 41984 . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.59 for python 2.5. # @@ -4609,8 +4609,9 @@ -for ac_header in curses.h dlfcn.h fcntl.h grp.h shadow.h langinfo.h \ -libintl.h ncurses.h poll.h pthread.h \ + +for ac_header in asm/types.h curses.h dlfcn.h fcntl.h grp.h \ +shadow.h langinfo.h libintl.h ncurses.h poll.h pthread.h \ stropts.h termios.h thread.h \ unistd.h utime.h \ sys/audioio.h sys/bsdtty.h sys/file.h sys/loadavg.h sys/lock.h sys/mkdev.h \ @@ -5519,6 +5520,76 @@ done +# On Linux, netlink.h requires asm/types.h + +for ac_header in linux/netlink.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +#ifdef HAVE_ASM_TYPES_H +#include +#endif +#ifdef HAVE_SYS_SOCKET_H +#include +#endif + + +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Header=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +eval "$as_ac_Header=no" +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + # checks for typedefs was_it_defined=no echo "$as_me:$LINENO: checking for clock_t in time.h" >&5 Modified: python/trunk/configure.in ============================================================================== --- python/trunk/configure.in (original) +++ python/trunk/configure.in Sat Jan 14 19:12:57 2006 @@ -994,8 +994,8 @@ # checks for header files AC_HEADER_STDC -AC_CHECK_HEADERS(curses.h dlfcn.h fcntl.h grp.h shadow.h langinfo.h \ -libintl.h ncurses.h poll.h pthread.h \ +AC_CHECK_HEADERS(asm/types.h curses.h dlfcn.h fcntl.h grp.h \ +shadow.h langinfo.h libintl.h ncurses.h poll.h pthread.h \ stropts.h termios.h thread.h \ unistd.h utime.h \ sys/audioio.h sys/bsdtty.h sys/file.h sys/loadavg.h sys/lock.h sys/mkdev.h \ @@ -1014,6 +1014,16 @@ #endif ]) +# On Linux, netlink.h requires asm/types.h +AC_CHECK_HEADERS(linux/netlink.h,,,[ +#ifdef HAVE_ASM_TYPES_H +#include +#endif +#ifdef HAVE_SYS_SOCKET_H +#include +#endif +]) + # checks for typedefs was_it_defined=no AC_MSG_CHECKING(for clock_t in time.h) Modified: python/trunk/pyconfig.h.in ============================================================================== --- python/trunk/pyconfig.h.in (original) +++ python/trunk/pyconfig.h.in Sat Jan 14 19:12:57 2006 @@ -37,6 +37,9 @@ /* Define this if your time.h defines altzone. */ #undef HAVE_ALTZONE +/* Define to 1 if you have the header file. */ +#undef HAVE_ASM_TYPES_H + /* Define to 1 if you have the `bind_textdomain_codeset' function. */ #undef HAVE_BIND_TEXTDOMAIN_CODESET @@ -290,6 +293,9 @@ /* Define if you have the 'link' function. */ #undef HAVE_LINK +/* Define to 1 if you have the header file. */ +#undef HAVE_LINUX_NETLINK_H + /* Define this if you have the type long long. */ #undef HAVE_LONG_LONG From python-checkins at python.org Sat Jan 14 20:34:58 2006 From: python-checkins at python.org (phillip.eby) Date: Sat, 14 Jan 2006 20:34:58 +0100 (CET) Subject: [Python-checkins] r42047 - sandbox/trunk/setuptools/setuptools/command/build_ext.py Message-ID: <20060114193458.9F00D1E4002@bag.python.org> Author: phillip.eby Date: Sat Jan 14 20:34:55 2006 New Revision: 42047 Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py Log: Tweak build configuration for OS X, based on suggestions from Michael Twomey on the distutils-sig. (It looks like we may also need to add an '-install_name' or some such to the library link step.) Modified: sandbox/trunk/setuptools/setuptools/command/build_ext.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/build_ext.py (original) +++ sandbox/trunk/setuptools/setuptools/command/build_ext.py Sat Jan 14 20:34:55 2006 @@ -9,30 +9,30 @@ from distutils.file_util import copy_file from setuptools.extension import Library from distutils.ccompiler import new_compiler -from distutils.sysconfig import customize_compiler, _config_vars +from distutils.sysconfig import customize_compiler, get_config_var +get_config_var("LDSHARED") # make sure _config_vars is initialized +from distutils.sysconfig import _config_vars from distutils import log from distutils.errors import * have_rtld = False +use_stubs = False libtype = 'shared' -if os.name != 'nt': + +if sys.platform == "darwin": + use_stubs = True +elif os.name != 'nt': try: from dl import RTLD_NOW have_rtld = True + use_stubs = True except ImportError: pass - - - - - - - - - - - +def if_dl(s): + if have_rtld: + return s + return '' @@ -86,7 +86,7 @@ if isinstance(ext,Library): fn, ext = os.path.splitext(filename) return self.shlib_compiler.library_filename(fn,libtype) - elif have_rtld and ext._links_to_dynamic: + elif use_stubs and ext._links_to_dynamic: d,fn = os.path.split(filename) return os.path.join(d,'dl-'+fn) else: @@ -111,12 +111,12 @@ self.ext_map[fullname] = ext ltd = ext._links_to_dynamic = \ self.shlibs and self.links_to_dynamic(ext) or False - ext._needs_stub = ltd and have_rtld and not isinstance(ext,Library) + ext._needs_stub = ltd and use_stubs and not isinstance(ext,Library) filename = ext._file_name = self.get_ext_filename(fullname) libdir = os.path.dirname(os.path.join(self.build_lib,filename)) if ltd and libdir not in ext.library_dirs: ext.library_dirs.append(libdir) - if ltd and have_rtld and os.curdir not in ext.runtime_library_dirs: + if ltd and use_stubs and os.curdir not in ext.runtime_library_dirs: ext.runtime_library_dirs.append(os.curdir) @@ -129,7 +129,7 @@ tmp = _config_vars.copy() try: # XXX Help! I don't have any idea whether these are right... - _config_vars['LDSHARED'] = "-dynamiclib -undefined dynamic_lookup" + _config_vars['LDSHARED'] = "gcc -Wl,-x -dynamiclib -undefined dynamic_lookup" _config_vars['CCSHARED'] = " -dynamiclib" _config_vars['SO'] = ".dylib" customize_compiler(compiler) @@ -213,20 +213,20 @@ f.write('\n'.join([ "def __bootstrap__():", " global __bootstrap__, __file__, __loader__", - " import sys, os, pkg_resources, imp, dl", + " import sys, os, pkg_resources, imp"+if_dl(", dl"), " __file__ = pkg_resources.resource_filename(__name__,%r)" % os.path.basename(ext._file_name), " del __bootstrap__", " if '__loader__' in globals():", " del __loader__", - " old_flags = sys.getdlopenflags()", + if_dl(" old_flags = sys.getdlopenflags()"), " old_dir = os.getcwd()", " try:", " os.chdir(os.path.dirname(__file__))", - " sys.setdlopenflags(dl.RTLD_NOW)", + if_dl(" sys.setdlopenflags(dl.RTLD_NOW)"), " imp.load_dynamic(__name__,__file__)", " finally:", - " sys.setdlopenflags(old_flags)", + if_dl(" sys.setdlopenflags(old_flags)"), " os.chdir(old_dir)", "__bootstrap__()", "" # terminal \n @@ -244,7 +244,7 @@ os.unlink(stub_file) -if have_rtld or os.name=='nt': +if use_stubs or os.name=='nt': # Build shared libraries # def link_shared_object(self, objects, output_libname, output_dir=None, From nnorwitz at gmail.com Sat Jan 14 22:09:52 2006 From: nnorwitz at gmail.com (Neal Norwitz) Date: Sat, 14 Jan 2006 13:09:52 -0800 Subject: [Python-checkins] r42046 - in python/trunk: Doc/lib/libsocket.tex Misc/ACKS Misc/NEWS Modules/socketmodule.c Modules/socketmodule.h configure configure.in pyconfig.h.in In-Reply-To: <20060114181303.A2EEA1E4002@bag.python.org> References: <20060114181303.A2EEA1E4002@bag.python.org> Message-ID: > --- python/trunk/Modules/socketmodule.c (original) > +++ python/trunk/Modules/socketmodule.c Sat Jan 14 19:12:57 2006 > @@ -954,6 +954,14 @@ > } > #endif /* AF_UNIX */ > > +#if defined(AF_NETLINK) > + case AF_NETLINK: > + { > + struct sockaddr_nl *a = (struct sockaddr_nl *) addr; > + return Py_BuildValue("ii", a->nl_pid, a->nl_groups); > + } > @@ -1090,6 +1098,31 @@ > } > #endif /* AF_UNIX */ > > +#if defined(AF_NETLINK) > + case AF_NETLINK: > + { > + struct sockaddr_nl* addr; > + int pid, groups; > [...] > + if (!PyArg_ParseTuple(args, "II:getsockaddrarg", &pid, &groups)) Should the format values for Py_BuildValue (above) be II (2 cap eye's) like the format to Py_ParseTuple? n From nnorwitz at gmail.com Sat Jan 14 22:14:12 2006 From: nnorwitz at gmail.com (Neal Norwitz) Date: Sat, 14 Jan 2006 13:14:12 -0800 Subject: [Python-checkins] r42044 - python/trunk/Lib/test/crashers/dangerous_subclassing.py python/trunk/Lib/test/crashers/infinite_rec_1.py python/trunk/Lib/test/crashers/infinite_rec_2.py python/trunk/Lib/test/crashers/infinite_rec_3.py python/trunk/Lib Message-ID: On 1/14/06, armin.rigo wrote: > > collected my segfaulting Python examples from the SF trackers > (is the purpose of the crashers directory to scare people? :-) No, it's to demonstrate how evil you can be. ;-) Thanks! n From python-checkins at python.org Sat Jan 14 22:26:27 2006 From: python-checkins at python.org (neal.norwitz) Date: Sat, 14 Jan 2006 22:26:27 +0100 (CET) Subject: [Python-checkins] r42048 - python/trunk/Misc/ACKS Message-ID: <20060114212627.EF3C51E400B@bag.python.org> Author: neal.norwitz Date: Sat Jan 14 22:26:22 2006 New Revision: 42048 Modified: python/trunk/Misc/ACKS Log: Alphabetize some names and remove a dupe Modified: python/trunk/Misc/ACKS ============================================================================== --- python/trunk/Misc/ACKS (original) +++ python/trunk/Misc/ACKS Sat Jan 14 22:26:22 2006 @@ -89,9 +89,9 @@ Jan-Hein B"uhrman Dick Bulterman Bill Bumgarner +Jimmy Burgett Tommy Burnette Roger Burnham -Jimmy Burgett Alastair Burt Tarn Weisner Burton Lee Busby @@ -378,8 +378,8 @@ Jason Lowe Tony Lownds Ray Loyzaga -Fredrik Lundh Loren Luke +Fredrik Lundh Mark Lutz Jim Lynch Mikael Lyngvig @@ -522,6 +522,7 @@ Sam Rushing Mark Russell Nick Russo +Hajime Saitou Rich Salz Kevin Samborn Ty Sarna @@ -565,27 +566,25 @@ Per Spilling Joshua Spoerri Noah Spurrier -Oliver Steele -Greg Stein -Chris Stern -Michael Stone -Dan Stromberg -Nathan Sullivan -Mark Summerfield -Hisao Suzuki -Kalle Svensson -Hajime Saitou Nathan Srebro RajGopal Srinivasan Jim St. Pierre Quentin Stafford-Fraser Frank Stajano +Oliver Steele +Greg Stein +Chris Stern Richard Stoakley -Casper Stoel Peter Stoehr +Casper Stoel Michael Stone Ken Stox +Dan Stromberg Daniel Stutzbach +Nathan Sullivan +Mark Summerfield +Hisao Suzuki +Kalle Svensson Paul Swartz Thenault Sylvain Geoff Talvola @@ -625,15 +624,15 @@ Norman Vine Frank Visser Niki W. Waibel +Wojtek Walczak Charles Waldman Richard Walker Larry Wall -Wojtek Walczak Greg Ward Barry Warsaw Steve Waterbury -Aaron Watters Bob Watson +Aaron Watters Henrik Weber Corran Webster Zack Weinberg @@ -664,9 +663,9 @@ Doug Wyatt Ka-Ping Yee Bob Yodlowski +Danny Yoo George Yoshida Masazumi Yoshikawa -Danny Yoo Bernard Yue Moshe Zadka Milan Zamazal From python-checkins at python.org Sun Jan 15 03:58:00 2006 From: python-checkins at python.org (neal.norwitz) Date: Sun, 15 Jan 2006 03:58:00 +0100 (CET) Subject: [Python-checkins] r42049 - python/trunk/Doc/lib/libsocket.tex Message-ID: <20060115025800.B65241E4002@bag.python.org> Author: neal.norwitz Date: Sun Jan 15 03:57:57 2006 New Revision: 42049 Modified: python/trunk/Doc/lib/libsocket.tex Log: Remove extra period at end. Modified: python/trunk/Doc/lib/libsocket.tex ============================================================================== --- python/trunk/Doc/lib/libsocket.tex (original) +++ python/trunk/Doc/lib/libsocket.tex Sun Jan 15 03:57:57 2006 @@ -69,7 +69,7 @@ \var{host} portion. \versionadded[2.5]{AF_NETLINK sockets are represented as -pairs \code{\var{pid}, \var{groups}}.} +pairs \code{\var{pid}, \var{groups}}} All errors raise exceptions. The normal exceptions for invalid argument types and out-of-memory conditions can be raised; errors From python-checkins at python.org Sun Jan 15 08:00:52 2006 From: python-checkins at python.org (martin.v.loewis) Date: Sun, 15 Jan 2006 08:00:52 +0100 (CET) Subject: [Python-checkins] r42050 - peps/trunk/pep-0353.txt Message-ID: <20060115070052.C139F1E4002@bag.python.org> Author: martin.v.loewis Date: Sun Jan 15 08:00:46 2006 New Revision: 42050 Modified: peps/trunk/pep-0353.txt (props changed) Log: Add keywords. From python-checkins at python.org Sun Jan 15 08:26:56 2006 From: python-checkins at python.org (martin.v.loewis) Date: Sun, 15 Jan 2006 08:26:56 +0100 (CET) Subject: [Python-checkins] r42050 - peps/trunk/pep-0353.txt Message-ID: <20060115072656.71CF81E4002@bag.python.org> Author: martin.v.loewis Date: Sun Jan 15 08:00:46 2006 New Revision: 42050 Modified: peps/trunk/pep-0353.txt (props changed) Log: Add keywords. From python-checkins at python.org Sun Jan 15 08:27:56 2006 From: python-checkins at python.org (martin.v.loewis) Date: Sun, 15 Jan 2006 08:27:56 +0100 (CET) Subject: [Python-checkins] r42050 - peps/trunk/pep-0353.txt Message-ID: <20060115072756.37D1A1E4002@bag.python.org> Author: martin.v.loewis Date: Sun Jan 15 08:00:46 2006 New Revision: 42050 Modified: peps/trunk/pep-0353.txt (props changed) Log: Add keywords. From python-checkins at python.org Sun Jan 15 08:29:32 2006 From: python-checkins at python.org (martin.v.loewis) Date: Sun, 15 Jan 2006 08:29:32 +0100 (CET) Subject: [Python-checkins] r42051 - peps/trunk/Makefile Message-ID: <20060115072932.8BB771E4002@bag.python.org> Author: martin.v.loewis Date: Sun Jan 15 08:29:30 2006 New Revision: 42051 Modified: peps/trunk/Makefile Log: Clean install: rule. Modified: peps/trunk/Makefile ============================================================================== --- peps/trunk/Makefile (original) +++ peps/trunk/Makefile Sun Jan 15 08:29:30 2006 @@ -17,7 +17,7 @@ $(TARGETS): pep2html.py install: - $(PEP2HTML) -i + echo "Installing is not necessary anymore. It will be done in post-commit." clean: -rm *.html From python-checkins at python.org Sun Jan 15 08:49:21 2006 From: python-checkins at python.org (martin.v.loewis) Date: Sun, 15 Jan 2006 08:49:21 +0100 (CET) Subject: [Python-checkins] r42052 - python/trunk/Modules/socketmodule.c Message-ID: <20060115074921.8941A1E401F@bag.python.org> Author: martin.v.loewis Date: Sun Jan 15 08:49:20 2006 New Revision: 42052 Modified: python/trunk/Modules/socketmodule.c Log: Change build format of netlink addresses. Modified: python/trunk/Modules/socketmodule.c ============================================================================== --- python/trunk/Modules/socketmodule.c (original) +++ python/trunk/Modules/socketmodule.c Sun Jan 15 08:49:20 2006 @@ -958,7 +958,7 @@ case AF_NETLINK: { struct sockaddr_nl *a = (struct sockaddr_nl *) addr; - return Py_BuildValue("ii", a->nl_pid, a->nl_groups); + return Py_BuildValue("II", a->nl_pid, a->nl_groups); } #endif /* AF_NETLINK */ From martin at v.loewis.de Sun Jan 15 08:50:28 2006 From: martin at v.loewis.de (=?ISO-8859-1?Q?=22Martin_v=2E_L=F6wis=22?=) Date: Sun, 15 Jan 2006 08:50:28 +0100 Subject: [Python-checkins] r42046 - in python/trunk: Doc/lib/libsocket.tex Misc/ACKS Misc/NEWS Modules/socketmodule.c Modules/socketmodule.h configure configure.in pyconfig.h.in In-Reply-To: References: <20060114181303.A2EEA1E4002@bag.python.org> Message-ID: <43C9FEC4.5030307@v.loewis.de> Neal Norwitz wrote: > Should the format values for Py_BuildValue (above) be II (2 cap eye's) > like the format to Py_ParseTuple? Right. Fixed in 42052. Regards, Martin From python-checkins at python.org Sun Jan 15 08:55:47 2006 From: python-checkins at python.org (georg.brandl) Date: Sun, 15 Jan 2006 08:55:47 +0100 (CET) Subject: [Python-checkins] r42054 - in python/branches/release24-maint/Doc: lib/emailutil.tex lib/libcmd.tex lib/libfuncs.tex lib/libos.tex lib/librandom.tex ref/ref2.tex Message-ID: <20060115075547.5255D1E400D@bag.python.org> Author: georg.brandl Date: Sun Jan 15 08:55:42 2006 New Revision: 42054 Modified: python/branches/release24-maint/Doc/lib/emailutil.tex python/branches/release24-maint/Doc/lib/libcmd.tex python/branches/release24-maint/Doc/lib/libfuncs.tex python/branches/release24-maint/Doc/lib/libos.tex python/branches/release24-maint/Doc/lib/librandom.tex python/branches/release24-maint/Doc/ref/ref2.tex Log: Remove dots in \version{added,changed} arguments. Modified: python/branches/release24-maint/Doc/lib/emailutil.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/emailutil.tex (original) +++ python/branches/release24-maint/Doc/lib/emailutil.tex Sun Jan 15 08:55:42 2006 @@ -148,10 +148,10 @@ \end{funcdesc} \versionchanged[The \function{dump_address_pair()} function has been removed; -use \function{formataddr()} instead.]{2.4} +use \function{formataddr()} instead]{2.4} \versionchanged[The \function{decode()} function has been removed; use the -\method{Header.decode_header()} method instead.]{2.4} +\method{Header.decode_header()} method instead]{2.4} \versionchanged[The \function{encode()} function has been removed; use the -\method{Header.encode()} method instead.]{2.4} +\method{Header.encode()} method instead]{2.4} Modified: python/branches/release24-maint/Doc/lib/libcmd.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libcmd.tex (original) +++ python/branches/release24-maint/Doc/lib/libcmd.tex Sun Jan 15 08:55:42 2006 @@ -29,7 +29,7 @@ instance will use for input and output. If not specified, they will default to \var{sys.stdin} and \var{sys.stdout}. -\versionchanged[The \var{stdin} and \var{stdout} parameters were added.]{2.3} +\versionchanged[The \var{stdin} and \var{stdout} parameters were added]{2.3} \end{classdesc} \subsection{Cmd Objects} Modified: python/branches/release24-maint/Doc/lib/libfuncs.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libfuncs.tex (original) +++ python/branches/release24-maint/Doc/lib/libfuncs.tex Sun Jan 15 08:55:42 2006 @@ -510,7 +510,7 @@ \begin{funcdesc}{hex}{x} Convert an integer number (of any size) to a hexadecimal string. The result is a valid Python expression. - \versionchanged[Formerly only returned an unsigned literal.]{2.4} + \versionchanged[Formerly only returned an unsigned literal]{2.4} \end{funcdesc} \begin{funcdesc}{id}{object} @@ -670,7 +670,7 @@ \begin{funcdesc}{oct}{x} Convert an integer number (of any size) to an octal string. The result is a valid Python expression. - \versionchanged[Formerly only returned an unsigned literal.]{2.4} + \versionchanged[Formerly only returned an unsigned literal]{2.4} \end{funcdesc} \begin{funcdesc}{open}{filename\optional{, mode\optional{, bufsize}}} @@ -1133,7 +1133,7 @@ \versionchanged[Formerly, \function{zip()} required at least one argument and \code{zip()} raised a \exception{TypeError} instead of returning - an empty list.]{2.4} + an empty list]{2.4} \end{funcdesc} Modified: python/branches/release24-maint/Doc/lib/libos.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libos.tex (original) +++ python/branches/release24-maint/Doc/lib/libos.tex Sun Jan 15 08:55:42 2006 @@ -790,7 +790,7 @@ Availability: Macintosh, \UNIX, Windows. \versionchanged[On Windows NT/2k/XP and Unix, if \var{path} is a Unicode -object, the result will be a list of Unicode objects.]{2.3} +object, the result will be a list of Unicode objects]{2.3} \end{funcdesc} \begin{funcdesc}{lstat}{path} Modified: python/branches/release24-maint/Doc/lib/librandom.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/librandom.tex (original) +++ python/branches/release24-maint/Doc/lib/librandom.tex Sun Jan 15 08:55:42 2006 @@ -97,7 +97,7 @@ \versionadded{2.1} \versionchanged[Instead of jumping to a specific state, \var{n} steps ahead, \method{jumpahead(\var{n})} jumps to another state likely to be - separated by many steps.]{2.3} + separated by many steps]{2.3} \end{funcdesc} \begin{funcdesc}{getrandbits}{k} Modified: python/branches/release24-maint/Doc/ref/ref2.tex ============================================================================== --- python/branches/release24-maint/Doc/ref/ref2.tex (original) +++ python/branches/release24-maint/Doc/ref/ref2.tex Sun Jan 15 08:55:42 2006 @@ -9,7 +9,7 @@ Python uses the 7-bit \ASCII{} character set for program text. \versionadded[An encoding declaration can be used to indicate that -string literals and comments use an encoding different from ASCII.]{2.3} +string literals and comments use an encoding different from ASCII]{2.3} For compatibility with older versions, Python only warns if it finds 8-bit characters; those warnings should be corrected by either declaring an explicit encoding, or using escape sequences if those bytes are binary From python-checkins at python.org Sun Jan 15 08:55:47 2006 From: python-checkins at python.org (georg.brandl) Date: Sun, 15 Jan 2006 08:55:47 +0100 (CET) Subject: [Python-checkins] r42053 - in python/trunk/Doc: lib/emailutil.tex lib/libcmd.tex lib/libfuncs.tex lib/libos.tex lib/librandom.tex lib/libsocket.tex ref/ref2.tex Message-ID: <20060115075547.5881F1E401D@bag.python.org> Author: georg.brandl Date: Sun Jan 15 08:55:35 2006 New Revision: 42053 Modified: python/trunk/Doc/lib/emailutil.tex python/trunk/Doc/lib/libcmd.tex python/trunk/Doc/lib/libfuncs.tex python/trunk/Doc/lib/libos.tex python/trunk/Doc/lib/librandom.tex python/trunk/Doc/lib/libsocket.tex python/trunk/Doc/ref/ref2.tex Log: Remove dots in \version{changed,added} argument and correct argument order. Modified: python/trunk/Doc/lib/emailutil.tex ============================================================================== --- python/trunk/Doc/lib/emailutil.tex (original) +++ python/trunk/Doc/lib/emailutil.tex Sun Jan 15 08:55:35 2006 @@ -148,10 +148,10 @@ \end{funcdesc} \versionchanged[The \function{dump_address_pair()} function has been removed; -use \function{formataddr()} instead.]{2.4} +use \function{formataddr()} instead]{2.4} \versionchanged[The \function{decode()} function has been removed; use the -\method{Header.decode_header()} method instead.]{2.4} +\method{Header.decode_header()} method instead]{2.4} \versionchanged[The \function{encode()} function has been removed; use the -\method{Header.encode()} method instead.]{2.4} +\method{Header.encode()} method instead]{2.4} Modified: python/trunk/Doc/lib/libcmd.tex ============================================================================== --- python/trunk/Doc/lib/libcmd.tex (original) +++ python/trunk/Doc/lib/libcmd.tex Sun Jan 15 08:55:35 2006 @@ -29,7 +29,7 @@ instance will use for input and output. If not specified, they will default to \var{sys.stdin} and \var{sys.stdout}. -\versionchanged[The \var{stdin} and \var{stdout} parameters were added.]{2.3} +\versionchanged[The \var{stdin} and \var{stdout} parameters were added]{2.3} \end{classdesc} \subsection{Cmd Objects} Modified: python/trunk/Doc/lib/libfuncs.tex ============================================================================== --- python/trunk/Doc/lib/libfuncs.tex (original) +++ python/trunk/Doc/lib/libfuncs.tex Sun Jan 15 08:55:35 2006 @@ -536,7 +536,7 @@ \begin{funcdesc}{hex}{x} Convert an integer number (of any size) to a hexadecimal string. The result is a valid Python expression. - \versionchanged[Formerly only returned an unsigned literal.]{2.4} + \versionchanged[Formerly only returned an unsigned literal]{2.4} \end{funcdesc} \begin{funcdesc}{id}{object} @@ -708,7 +708,7 @@ \begin{funcdesc}{oct}{x} Convert an integer number (of any size) to an octal string. The result is a valid Python expression. - \versionchanged[Formerly only returned an unsigned literal.]{2.4} + \versionchanged[Formerly only returned an unsigned literal]{2.4} \end{funcdesc} \begin{funcdesc}{open}{filename\optional{, mode\optional{, bufsize}}} @@ -1171,7 +1171,7 @@ \versionchanged[Formerly, \function{zip()} required at least one argument and \code{zip()} raised a \exception{TypeError} instead of returning - an empty list.]{2.4} + an empty list]{2.4} \end{funcdesc} Modified: python/trunk/Doc/lib/libos.tex ============================================================================== --- python/trunk/Doc/lib/libos.tex (original) +++ python/trunk/Doc/lib/libos.tex Sun Jan 15 08:55:35 2006 @@ -810,7 +810,7 @@ Availability: Macintosh, \UNIX, Windows. \versionchanged[On Windows NT/2k/XP and \UNIX, if \var{path} is a Unicode -object, the result will be a list of Unicode objects.]{2.3} +object, the result will be a list of Unicode objects]{2.3} \end{funcdesc} \begin{funcdesc}{lstat}{path} Modified: python/trunk/Doc/lib/librandom.tex ============================================================================== --- python/trunk/Doc/lib/librandom.tex (original) +++ python/trunk/Doc/lib/librandom.tex Sun Jan 15 08:55:35 2006 @@ -97,7 +97,7 @@ \versionadded{2.1} \versionchanged[Instead of jumping to a specific state, \var{n} steps ahead, \method{jumpahead(\var{n})} jumps to another state likely to be - separated by many steps.]{2.3} + separated by many steps]{2.3} \end{funcdesc} \begin{funcdesc}{getrandbits}{k} Modified: python/trunk/Doc/lib/libsocket.tex ============================================================================== --- python/trunk/Doc/lib/libsocket.tex (original) +++ python/trunk/Doc/lib/libsocket.tex Sun Jan 15 08:55:35 2006 @@ -68,8 +68,8 @@ configuration. For deterministic behavior use a numeric address in \var{host} portion. -\versionadded[2.5]{AF_NETLINK sockets are represented as -pairs \code{\var{pid}, \var{groups}}} +\versionadded[AF_NETLINK sockets are represented as +pairs \code{\var{pid}, \var{groups}}]{2.5} All errors raise exceptions. The normal exceptions for invalid argument types and out-of-memory conditions can be raised; errors Modified: python/trunk/Doc/ref/ref2.tex ============================================================================== --- python/trunk/Doc/ref/ref2.tex (original) +++ python/trunk/Doc/ref/ref2.tex Sun Jan 15 08:55:35 2006 @@ -9,7 +9,7 @@ Python uses the 7-bit \ASCII{} character set for program text. \versionadded[An encoding declaration can be used to indicate that -string literals and comments use an encoding different from ASCII.]{2.3} +string literals and comments use an encoding different from ASCII]{2.3} For compatibility with older versions, Python only warns if it finds 8-bit characters; those warnings should be corrected by either declaring an explicit encoding, or using escape sequences if those bytes are binary From python-checkins at python.org Sun Jan 15 10:26:45 2006 From: python-checkins at python.org (martin.v.loewis) Date: Sun, 15 Jan 2006 10:26:45 +0100 (CET) Subject: [Python-checkins] r42055 - python/trunk/PCbuild/_elementtree.vcproj python/trunk/PCbuild/pythoncore.vcproj Message-ID: <20060115092645.489B81E4002@bag.python.org> Author: martin.v.loewis Date: Sun Jan 15 10:26:43 2006 New Revision: 42055 Modified: python/trunk/PCbuild/_elementtree.vcproj python/trunk/PCbuild/pythoncore.vcproj Log: Patch #1396093: Further .vcproj cleanups Modified: python/trunk/PCbuild/_elementtree.vcproj ============================================================================== --- python/trunk/PCbuild/_elementtree.vcproj (original) +++ python/trunk/PCbuild/_elementtree.vcproj Sun Jan 15 10:26:43 2006 @@ -25,10 +25,6 @@ PreprocessorDefinitions="_DEBUG;HAVE_EXPAT_H;WIN32;_WINDOWS;XML_NS;XML_DTD;BYTEORDER=1234;XML_CONTEXT_BYTES=1024;USE_PYEXPAT_CAPI;XML_STATIC;HAVE_MEMMOVE" RuntimeLibrary="3" UsePrecompiledHeader="2" - PrecompiledHeaderFile=".\x86-temp-debug\_elementtree/_elementtree.pch" - AssemblerListingLocation=".\x86-temp-debug\_elementtree/" - ObjectFile=".\x86-temp-debug\_elementtree/" - ProgramDataBaseFileName=".\x86-temp-debug\_elementtree/" WarningLevel="3" SuppressStartupBanner="TRUE" DebugInformationFormat="3" @@ -48,13 +44,7 @@ ImportLibrary=".\./_elementtree_d.lib" TargetMachine="1"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> + Name="VCMIDLTool"/> + Name="VCResourceCompilerTool"/> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Modified: python/trunk/PCbuild/pythoncore.vcproj ============================================================================== --- python/trunk/PCbuild/pythoncore.vcproj (original) +++ python/trunk/PCbuild/pythoncore.vcproj Sun Jan 15 10:26:43 2006 @@ -304,114 +304,18 @@
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -421,327 +325,39 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -754,1755 +370,255 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + PreprocessorDefinitions="BUILD=\"60\""/> + PreprocessorDefinitions="BUILD=\"60\""/> + PreprocessorDefinitions="BUILD=\"60\""/> + RelativePath="..\Python\getcompiler.c"> + + + + + + + + + + + + + + + + + + + + + + + + + AdditionalIncludeDirectories="..\Python"/> + AdditionalIncludeDirectories="..\Python"/> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + AdditionalIncludeDirectories="..\Python"/> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + RelativePath="..\Python\marshal.c"> + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + @@ -2512,921 +628,108 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + RelativePath="..\Python\structmember.c"> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - From python-checkins at python.org Sun Jan 15 10:34:14 2006 From: python-checkins at python.org (martin.v.loewis) Date: Sun, 15 Jan 2006 10:34:14 +0100 (CET) Subject: [Python-checkins] r42056 - python/trunk/PCbuild/BUILDno.txt python/trunk/PCbuild/pythoncore.vcproj Message-ID: <20060115093414.E22B31E4002@bag.python.org> Author: martin.v.loewis Date: Sun Jan 15 10:34:11 2006 New Revision: 42056 Removed: python/trunk/PCbuild/BUILDno.txt Modified: python/trunk/PCbuild/pythoncore.vcproj Log: Remove buildno. Deleted: /python/trunk/PCbuild/BUILDno.txt ============================================================================== --- /python/trunk/PCbuild/BUILDno.txt Sun Jan 15 10:34:11 2006 +++ (empty file) @@ -1,165 +0,0 @@ -Python has a "build number" scheme on Unix-like systems that's hard to -explain: - -Python 2.0b1 (#4, Sep 7 2000, 02:40:55) [MSC 32 bit (Intel)] on win32 - ^^ -The build number there is "#4". - -Each developer's unique build tree generates its own "build numbers", -starting at 0, and increasing by 1 each time a build is done in that tree. -These numbers are never checked in, or coordinated in any other way. It's -just handy for a developer to distinguish among their own personal builds. - -The makefile tricks used to accomplish this under Unix-like systems don't -work under MSDev. Here we fake it by hand, but much less frequently, and -do check it in. The build number only changes often enough to distinguish -releases from each other, and from the long "in between" stretches of CVS -development. An account of all Windows BUILD numbers follows; when you -check in a new one, please add an entry to the top of the list. - -How to change the Windows build number: - -+ Right-click on getbuildinfo.c from within MSDev. Select Settings ... -+ Select the General category of the C/C++ tab. -+ In "Settings For:" select "Multiple Configurations ...". -+ Check the "Win32 Release" and "Win32 Debug" boxes and click OK. -+ In the Preprocessor Definitions box, increment the number after BUILD=. -+ Click OK. -+ This is not enough to convince MSDev to recompile getbuildinfo.c, - so force that and relink. -+ Verify that the new build number shows up in both release and debug - builds. - - -Windows Python BUILD numbers ----------------------------- - 65 2.4.1 - 30-Mar-2005 - 64 2.4.1c2 - 17-Mar-2005 - 63 2.4.1c1 - 10-Mar-2005 - 62 2.3.5 - 08-Feb-2005 - 61 2.3.5c1 - 26-Jan-2005 - 60 2.4 - 29-Nov-2004 - 59 2.4.0c1 - 7-Nov-2004 - 58 2.4.0b2 - 2-Nov-2004 - 57 2.4.0b1 - 15-Oct-2004 - 56 2.4.0a3 - 2-Sep-2004 - 55 2.4.0a2 - 4-Aug-2004 - 54 2.4.0a1 - 8-Jul-2004 - 53 2.3.4 (final) - 27-May-2004 - 52 2.3.4c1 - 13-May-2004 - 51 2.3.3 (final) - 19-Dec-2003 - 50 2.3.3c1 - 5-Dec-2003 - 49 2.3.2 (final) - 3-Oct-2003 - 48 2.3.2c1 - 30-Sep-2003 - 47 2.3.1 (final) - 23-Sep-2003 - 46 2.3 (final) - 29-Jul-2003 - 45 2.3c2 - 24-Jul-2003 - 44 2.3c1 - 18-Jul-2003 - 43 2.3b2 - 29-Jun-2003 - 42 2.2.3 (final) - 30-May-2003 - 41 2.2.3c1 - 22-May-2003 - 40 2.3b1 - 25-Apr-2003 - 39 2.3a2 - 19-Feb-2003 - 38 2.3a1 - 31-Dec-2002 - 37 2.2.2 (final) - 14-Oct-2002 - 36 2.2.2b1 - 7-Oct-2002 - 35 2.1.3 (final) - 8-Apr-2002 - 34 2.2.1 (final) - 10-Apr-2002 - 33 2.2.1c2 - 26-Mar-2002 - 32 2.2.1c1 - 18-Mar-2002 - 31 2.1.2 final - 16-Jan-2002 - 30 2.1.2c1 - 10-Jan-2002 - 29 CVS development - 21-Dec-2001 - 28 2.2 final - 21-Dec-2001 - 27 2.2c1 - 14-Dec-2001 - 26 2.2b2 - 16-Nov-2001 - 25 2.2b1 - 19-Oct-2001 - 24 2.2a4 - 28-Sep-2001 - 23 2.2a3 - 07-Sep-2001 - 22 2.2a2 - 22-Aug-2001 - 21 2.2a1 - 18-Jul-2001 - 20 2.1.1 - 20-Jul-2001 - 19 2.1.1c1 - 13-Jul-2001 - 18 2.0.1 - 22-Jun-2001 - 17 2.0.1c1 - 13-Jun-2001 - 16 CVS development - 18-Apr-2001 - 15 2.1 final - 16-Apr-2001 - 14 2.1c2 - 15-Apr-2001 - 13 2.1c1 - 12-Apr-2001 - 12 2.1b2 - 20-Mar-2001 - 11 2.1b1 - 28-Feb-2001 - 10 2.1a2 - 1-Feb-2001 - 9 2.1a1 - 17-Jan-2001 - 8 2.0 (final) - 14-Oct-2000 - 7 2.0c1 - 07-Oct-2000 - 6 2.0b2 - 26-Sep-2000 - 5 CVS development - 07-Sep-2000 - 4 2.0b1 repaired to include Lib\xml + Lib\lib-old + Lib\test\*.xml - 07-Sep-2000 - 3 2.0b1 - 05-Sep-2000 - 2 CVS development - 1 unused - 0 2.0b1p1 and 2.0b1p2 - 01-Sep-2000 for both -- this scheme hadn't started yet Modified: python/trunk/PCbuild/pythoncore.vcproj ============================================================================== --- python/trunk/PCbuild/pythoncore.vcproj (original) +++ python/trunk/PCbuild/pythoncore.vcproj Sun Jan 15 10:34:11 2006 @@ -451,18 +451,6 @@ Name="VCCLCompilerTool" PreprocessorDefinitions="BUILD=\"60\""/> - - - - - - From python-checkins at python.org Sun Jan 15 11:18:00 2006 From: python-checkins at python.org (fredrik.lundh) Date: Sun, 15 Jan 2006 11:18:00 +0100 (CET) Subject: [Python-checkins] r42057 - python/trunk/Doc/lib/libmmap.tex Message-ID: <20060115101800.7AAC61E4002@bag.python.org> Author: fredrik.lundh Date: Sun Jan 15 11:17:59 2006 New Revision: 42057 Modified: python/trunk/Doc/lib/libmmap.tex Log: typo Modified: python/trunk/Doc/lib/libmmap.tex ============================================================================== --- python/trunk/Doc/lib/libmmap.tex (original) +++ python/trunk/Doc/lib/libmmap.tex Sun Jan 15 11:17:59 2006 @@ -164,6 +164,6 @@ \begin{methoddesc}{write_byte}{\var{byte}} Write the single-character string \var{byte} into memory at the current position of the file pointer; the file position is advanced - by \code{1}.If the mmap was created with \constant{ACCESS_READ}, + by \code{1}. If the mmap was created with \constant{ACCESS_READ}, then writing to it will throw a \exception{TypeError} exception. \end{methoddesc} From python-checkins at python.org Sun Jan 15 15:59:57 2006 From: python-checkins at python.org (fredrik.lundh) Date: Sun, 15 Jan 2006 15:59:57 +0100 (CET) Subject: [Python-checkins] r42058 - python/trunk/Doc/lib/libzipimport.tex Message-ID: <20060115145957.251FE1E4002@bag.python.org> Author: fredrik.lundh Date: Sun Jan 15 15:59:55 2006 New Revision: 42058 Modified: python/trunk/Doc/lib/libzipimport.tex Log: the implementation uses ZipImportError, not ZipImporterError... Modified: python/trunk/Doc/lib/libzipimport.tex ============================================================================== --- python/trunk/Doc/lib/libzipimport.tex (original) +++ python/trunk/Doc/lib/libzipimport.tex Sun Jan 15 15:59:55 2006 @@ -36,7 +36,7 @@ The available attributes of this module are: -\begin{excdesc}{ZipImporterError} +\begin{excdesc}{ZipImportError} Exception raised by zipimporter objects. It's a subclass of \exception{ImportError}, so it can be caught as \exception{ImportError}, too. From python-checkins at python.org Sun Jan 15 16:00:41 2006 From: python-checkins at python.org (fredrik.lundh) Date: Sun, 15 Jan 2006 16:00:41 +0100 (CET) Subject: [Python-checkins] r42059 - python/trunk/Modules/zipimport.c Message-ID: <20060115150041.54BD71E4002@bag.python.org> Author: fredrik.lundh Date: Sun Jan 15 16:00:40 2006 New Revision: 42059 Modified: python/trunk/Modules/zipimport.c Log: the implementation uses ZipImportError, not ZipImporterError... Modified: python/trunk/Modules/zipimport.c ============================================================================== --- python/trunk/Modules/zipimport.c (original) +++ python/trunk/Modules/zipimport.c Sun Jan 15 16:00:40 2006 @@ -1132,7 +1132,7 @@ \n\ This module exports three objects:\n\ - zipimporter: a class; its constructor takes a path to a Zip archive.\n\ -- ZipImporterError: exception raised by zipimporter objects. It's a\n\ +- ZipImportError: exception raised by zipimporter objects. It's a\n\ subclass of ImportError, so it can be caught as ImportError, too.\n\ - _zip_directory_cache: a dict, mapping archive paths to zip directory\n\ info dicts, as used in zipimporter._files.\n\ From python-checkins at python.org Sun Jan 15 17:11:32 2006 From: python-checkins at python.org (andrew.kuchling) Date: Sun, 15 Jan 2006 17:11:32 +0100 (CET) Subject: [Python-checkins] r42060 - python/trunk/Doc/whatsnew/whatsnew25.tex Message-ID: <20060115161132.89EC61E4013@bag.python.org> Author: andrew.kuchling Date: Sun Jan 15 17:11:28 2006 New Revision: 42060 Modified: python/trunk/Doc/whatsnew/whatsnew25.tex Log: Add recent item Modified: python/trunk/Doc/whatsnew/whatsnew25.tex ============================================================================== --- python/trunk/Doc/whatsnew/whatsnew25.tex (original) +++ python/trunk/Doc/whatsnew/whatsnew25.tex Sun Jan 15 17:11:28 2006 @@ -406,6 +406,14 @@ The \member{st_flags} member is also available, if the platform supports it. % XXX patch 1180695, 1212117 +\item The \module{socket} module now supports \constant{AF_NETLINK} +sockets on Linux, thanks to a patch from Philippe Biondi. +Netlink sockets are a Linux-specific mechanism for communications +between a user-space process and kernel code; an introductory +article about them is at \url{http://www.linuxjournal.com/article/7356}. +In Python code, netlink addresses are represented as a tuple of 2 integers, +\code{(\var{pid}, \var{group_mask})}. + \item New module: \module{spwd} provides functions for accessing the shadow password database on systems that support it. % XXX give example From python-checkins at python.org Mon Jan 16 02:51:48 2006 From: python-checkins at python.org (barry.warsaw) Date: Mon, 16 Jan 2006 02:51:48 +0100 (CET) Subject: [Python-checkins] r42061 - sandbox/trunk/emailpkg/2.5 sandbox/trunk/emailpkg/2.5/testall.py Message-ID: <20060116015148.C69E11E4009@bag.python.org> Author: barry.warsaw Date: Mon Jan 16 02:51:48 2006 New Revision: 42061 Modified: sandbox/trunk/emailpkg/2.5/ (props changed) sandbox/trunk/emailpkg/2.5/testall.py Log: Extern in the email package from Python 2.3 for the email 2.5 release. Also, simplify the testall.py script. Modified: sandbox/trunk/emailpkg/2.5/testall.py ============================================================================== --- sandbox/trunk/emailpkg/2.5/testall.py (original) +++ sandbox/trunk/emailpkg/2.5/testall.py Mon Jan 16 02:51:48 2006 @@ -26,15 +26,9 @@ # See if we have the Japanese codecs package installed try: - # Python 2.4 - unicode('foo', 'euc-jp') -except LookupError: - try: - unicode('foo', 'japanese.iso-2022-jp') - except LookupError: - test_email_codecs = None -else: from email.test import test_email_codecs +except TestSkipped: + test_email_codecs = None From python-checkins at python.org Mon Jan 16 05:31:40 2006 From: python-checkins at python.org (neal.norwitz) Date: Mon, 16 Jan 2006 05:31:40 +0100 (CET) Subject: [Python-checkins] r42062 - python/trunk/Modules/socketmodule.c python/trunk/Modules/socketmodule.h Message-ID: <20060116043140.B01861E4002@bag.python.org> Author: neal.norwitz Date: Mon Jan 16 05:31:40 2006 New Revision: 42062 Modified: python/trunk/Modules/socketmodule.c python/trunk/Modules/socketmodule.h Log: Get socketmodule to build after adding netlink support. Modified: python/trunk/Modules/socketmodule.c ============================================================================== --- python/trunk/Modules/socketmodule.c (original) +++ python/trunk/Modules/socketmodule.c Mon Jan 16 05:31:40 2006 @@ -3993,7 +3993,9 @@ PyModule_AddIntConstant(m, "NETLINK_FIREWALL", NETLINK_FIREWALL); PyModule_AddIntConstant(m, "NETLINK_TCPDIAG", NETLINK_TCPDIAG); PyModule_AddIntConstant(m, "NETLINK_NFLOG", NETLINK_NFLOG); +#ifdef NETLINK_XFRM PyModule_AddIntConstant(m, "NETLINK_XFRM", NETLINK_XFRM); +#endif PyModule_AddIntConstant(m, "NETLINK_ARPD", NETLINK_ARPD); PyModule_AddIntConstant(m, "NETLINK_ROUTE6", NETLINK_ROUTE6); PyModule_AddIntConstant(m, "NETLINK_IP6_FW", NETLINK_IP6_FW); Modified: python/trunk/Modules/socketmodule.h ============================================================================== --- python/trunk/Modules/socketmodule.h (original) +++ python/trunk/Modules/socketmodule.h Mon Jan 16 05:31:40 2006 @@ -33,6 +33,9 @@ #endif #ifdef HAVE_LINUX_NETLINK_H +# ifdef HAVE_ASM_TYPES_H +# include +# endif # include #else # undef AF_NETLINK From python-checkins at python.org Mon Jan 16 05:37:22 2006 From: python-checkins at python.org (neal.norwitz) Date: Mon, 16 Jan 2006 05:37:22 +0100 (CET) Subject: [Python-checkins] r42063 - python/trunk/Misc/build.sh Message-ID: <20060116043722.47B351E4002@bag.python.org> Author: neal.norwitz Date: Mon Jan 16 05:37:22 2006 New Revision: 42063 Modified: python/trunk/Misc/build.sh Log: refresh every 12 hours for crazy people like me that keep the page up Modified: python/trunk/Misc/build.sh ============================================================================== --- python/trunk/Misc/build.sh (original) +++ python/trunk/Misc/build.sh Mon Jan 16 05:37:22 2006 @@ -82,7 +82,11 @@ ## create results file TITLE="Automated Python Build Results" -echo "$TITLE" >> $RESULT_FILE +echo "" >> $RESULT_FILE +echo " " >> $RESULT_FILE +echo " $TITLE" >> $RESULT_FILE +echo " " >> $RESULT_FILE +echo " " >> $RESULT_FILE echo "" >> $RESULT_FILE echo "

    Automated Python Build Results

    " >> $RESULT_FILE echo "" >> $RESULT_FILE From python-checkins at python.org Mon Jan 16 10:08:10 2006 From: python-checkins at python.org (vinay.sajip) Date: Mon, 16 Jan 2006 10:08:10 +0100 (CET) Subject: [Python-checkins] r42064 - python/trunk/Lib/logging/handlers.py Message-ID: <20060116090810.7D2581E4002@bag.python.org> Author: vinay.sajip Date: Mon Jan 16 10:08:06 2006 New Revision: 42064 Modified: python/trunk/Lib/logging/handlers.py Log: Fixed bug in time-to-midnight calculation. Modified: python/trunk/Lib/logging/handlers.py ============================================================================== --- python/trunk/Lib/logging/handlers.py (original) +++ python/trunk/Lib/logging/handlers.py Mon Jan 16 10:08:06 2006 @@ -212,9 +212,12 @@ currentMinute = t[4] currentSecond = t[5] # r is the number of seconds left between now and midnight - r = (24 - currentHour) * 60 * 60 # number of hours in seconds - r = r + (59 - currentMinute) * 60 # plus the number of minutes (in secs) - r = r + (59 - currentSecond) # plus the number of seconds + if (currentMinute == 0) and (currentSecond == 0): + r = (24 - currentHour) * 60 * 60 # number of hours in seconds + else: + r = (23 - currentHour) * 60 * 60 + r = r + (59 - currentMinute) * 60 # plus the number of minutes (in secs) + r = r + (60 - currentSecond) # plus the number of seconds self.rolloverAt = currentTime + r # If we are rolling over on a certain day, add in the number of days until # the next rollover, but offset by 1 since we just calculated the time From python-checkins at python.org Mon Jan 16 10:10:07 2006 From: python-checkins at python.org (vinay.sajip) Date: Mon, 16 Jan 2006 10:10:07 +0100 (CET) Subject: [Python-checkins] r42065 - python/branches/release24-maint/Lib/logging/handlers.py Message-ID: <20060116091007.3A8961E4014@bag.python.org> Author: vinay.sajip Date: Mon Jan 16 10:10:05 2006 New Revision: 42065 Modified: python/branches/release24-maint/Lib/logging/handlers.py Log: Fixed bug in time-to-midnight calculation. Modified: python/branches/release24-maint/Lib/logging/handlers.py ============================================================================== --- python/branches/release24-maint/Lib/logging/handlers.py (original) +++ python/branches/release24-maint/Lib/logging/handlers.py Mon Jan 16 10:10:05 2006 @@ -212,9 +212,12 @@ currentMinute = t[4] currentSecond = t[5] # r is the number of seconds left between now and midnight - r = (24 - currentHour) * 60 * 60 # number of hours in seconds - r = r + (59 - currentMinute) * 60 # plus the number of minutes (in secs) - r = r + (59 - currentSecond) # plus the number of seconds + if (currentMinute == 0) and (currentSecond == 0): + r = (24 - currentHour) * 60 * 60 # number of hours in seconds + else: + r = (23 - currentHour) * 60 * 60 + r = r + (59 - currentMinute) * 60 # plus the number of minutes (in secs) + r = r + (60 - currentSecond) # plus the number of seconds self.rolloverAt = currentTime + r # If we are rolling over on a certain day, add in the number of days until # the next rollover, but offset by 1 since we just calculated the time From python-checkins at python.org Mon Jan 16 10:14:00 2006 From: python-checkins at python.org (vinay.sajip) Date: Mon, 16 Jan 2006 10:14:00 +0100 (CET) Subject: [Python-checkins] r42066 - python/trunk/Lib/logging/handlers.py Message-ID: <20060116091400.8C43F1E4002@bag.python.org> Author: vinay.sajip Date: Mon Jan 16 10:13:58 2006 New Revision: 42066 Modified: python/trunk/Lib/logging/handlers.py Log: TimedRotatingFileHandler now calculates next rollover from previous rollover rather than current time. Modified: python/trunk/Lib/logging/handlers.py ============================================================================== --- python/trunk/Lib/logging/handlers.py (original) +++ python/trunk/Lib/logging/handlers.py Mon Jan 16 10:13:58 2006 @@ -282,7 +282,7 @@ self.stream = codecs.open(self.baseFilename, 'w', self.encoding) else: self.stream = open(self.baseFilename, 'w') - self.rolloverAt = int(time.time()) + self.interval + self.rolloverAt = self.rolloverAt + self.interval class SocketHandler(logging.Handler): """ From python-checkins at python.org Mon Jan 16 10:14:48 2006 From: python-checkins at python.org (vinay.sajip) Date: Mon, 16 Jan 2006 10:14:48 +0100 (CET) Subject: [Python-checkins] r42067 - python/branches/release24-maint/Lib/logging/handlers.py Message-ID: <20060116091448.1407F1E4002@bag.python.org> Author: vinay.sajip Date: Mon Jan 16 10:14:47 2006 New Revision: 42067 Modified: python/branches/release24-maint/Lib/logging/handlers.py Log: TimedRotatingFileHandler now calculates next rollover from previous rollover rather than current time. Modified: python/branches/release24-maint/Lib/logging/handlers.py ============================================================================== --- python/branches/release24-maint/Lib/logging/handlers.py (original) +++ python/branches/release24-maint/Lib/logging/handlers.py Mon Jan 16 10:14:47 2006 @@ -282,7 +282,7 @@ self.stream = codecs.open(self.baseFilename, 'w', self.encoding) else: self.stream = open(self.baseFilename, 'w') - self.rolloverAt = int(time.time()) + self.interval + self.rolloverAt = self.rolloverAt + self.interval class SocketHandler(logging.Handler): """ From python-checkins at python.org Mon Jan 16 10:27:13 2006 From: python-checkins at python.org (vinay.sajip) Date: Mon, 16 Jan 2006 10:27:13 +0100 (CET) Subject: [Python-checkins] r42068 - python/trunk/Lib/logging/handlers.py Message-ID: <20060116092713.CD5AD1E40C1@bag.python.org> Author: vinay.sajip Date: Mon Jan 16 10:27:10 2006 New Revision: 42068 Modified: python/trunk/Lib/logging/handlers.py Log: Exceptions raised during renaming in rotating file handlers are now passed to handleError (except for SystemExit and KeyboardInterrupt, which are re-raised). Modified: python/trunk/Lib/logging/handlers.py ============================================================================== --- python/trunk/Lib/logging/handlers.py (original) +++ python/trunk/Lib/logging/handlers.py Mon Jan 16 10:27:10 2006 @@ -126,7 +126,12 @@ dfn = self.baseFilename + ".1" if os.path.exists(dfn): os.remove(dfn) - os.rename(self.baseFilename, dfn) + try: + os.rename(self.baseFilename, dfn) + except (KeyboardInterrupt, SystemExit): + raise + except: + pass #print "%s -> %s" % (self.baseFilename, dfn) if self.encoding: self.stream = codecs.open(self.baseFilename, 'w', self.encoding) @@ -270,7 +275,12 @@ dfn = self.baseFilename + "." + time.strftime(self.suffix, timeTuple) if os.path.exists(dfn): os.remove(dfn) - os.rename(self.baseFilename, dfn) + try: + os.rename(self.baseFilename, dfn) + except (KeyboardInterrupt, SystemExit): + raise + except: + pass if self.backupCount > 0: # find the oldest log file and delete it s = glob.glob(self.baseFilename + ".20*") From python-checkins at python.org Mon Jan 16 10:27:59 2006 From: python-checkins at python.org (vinay.sajip) Date: Mon, 16 Jan 2006 10:27:59 +0100 (CET) Subject: [Python-checkins] r42069 - python/trunk/Lib/logging/handlers.py Message-ID: <20060116092759.52D501E4002@bag.python.org> Author: vinay.sajip Date: Mon Jan 16 10:27:58 2006 New Revision: 42069 Modified: python/trunk/Lib/logging/handlers.py Log: Exceptions raised during renaming in rotating file handlers are now passed to handleError (except for SystemExit and KeyboardInterrupt, which are re-raised). Modified: python/trunk/Lib/logging/handlers.py ============================================================================== --- python/trunk/Lib/logging/handlers.py (original) +++ python/trunk/Lib/logging/handlers.py Mon Jan 16 10:27:58 2006 @@ -131,7 +131,7 @@ except (KeyboardInterrupt, SystemExit): raise except: - pass + self.handleError(record) #print "%s -> %s" % (self.baseFilename, dfn) if self.encoding: self.stream = codecs.open(self.baseFilename, 'w', self.encoding) @@ -280,7 +280,7 @@ except (KeyboardInterrupt, SystemExit): raise except: - pass + self.handleError(record) if self.backupCount > 0: # find the oldest log file and delete it s = glob.glob(self.baseFilename + ".20*") From python-checkins at python.org Mon Jan 16 10:29:00 2006 From: python-checkins at python.org (vinay.sajip) Date: Mon, 16 Jan 2006 10:29:00 +0100 (CET) Subject: [Python-checkins] r42070 - python/branches/release24-maint/Lib/logging/handlers.py Message-ID: <20060116092900.9D7AC1E4002@bag.python.org> Author: vinay.sajip Date: Mon Jan 16 10:28:59 2006 New Revision: 42070 Modified: python/branches/release24-maint/Lib/logging/handlers.py Log: Exceptions raised during renaming in rotating file handlers are now passed to handleError (except for SystemExit and KeyboardInterrupt, which are re-raised). Modified: python/branches/release24-maint/Lib/logging/handlers.py ============================================================================== --- python/branches/release24-maint/Lib/logging/handlers.py (original) +++ python/branches/release24-maint/Lib/logging/handlers.py Mon Jan 16 10:28:59 2006 @@ -126,7 +126,12 @@ dfn = self.baseFilename + ".1" if os.path.exists(dfn): os.remove(dfn) - os.rename(self.baseFilename, dfn) + try: + os.rename(self.baseFilename, dfn) + except (KeyboardInterrupt, SystemExit): + raise + except: + self.handleError(record) #print "%s -> %s" % (self.baseFilename, dfn) if self.encoding: self.stream = codecs.open(self.baseFilename, 'w', self.encoding) @@ -270,7 +275,12 @@ dfn = self.baseFilename + "." + time.strftime(self.suffix, timeTuple) if os.path.exists(dfn): os.remove(dfn) - os.rename(self.baseFilename, dfn) + try: + os.rename(self.baseFilename, dfn) + except (KeyboardInterrupt, SystemExit): + raise + except: + self.handleError(record) if self.backupCount > 0: # find the oldest log file and delete it s = glob.glob(self.baseFilename + ".20*") From python-checkins at python.org Mon Jan 16 15:54:11 2006 From: python-checkins at python.org (barry.warsaw) Date: Mon, 16 Jan 2006 15:54:11 +0100 (CET) Subject: [Python-checkins] r42071 - sandbox/trunk/emailpkg/3.0 sandbox/trunk/emailpkg/3.0/MANIFEST sandbox/trunk/emailpkg/3.0/NEWS sandbox/trunk/emailpkg/3.0/README sandbox/trunk/emailpkg/3.0/setup.py sandbox/trunk/emailpkg/3.0/testall.py Message-ID: <20060116145411.64D401E4009@bag.python.org> Author: barry.warsaw Date: Mon Jan 16 15:54:10 2006 New Revision: 42071 Added: sandbox/trunk/emailpkg/3.0/MANIFEST - copied unchanged from r41750, sandbox/trunk/emailpkg/2.5/MANIFEST sandbox/trunk/emailpkg/3.0/NEWS - copied unchanged from r41750, sandbox/trunk/emailpkg/2.5/NEWS sandbox/trunk/emailpkg/3.0/README - copied unchanged from r41750, sandbox/trunk/emailpkg/2.5/README sandbox/trunk/emailpkg/3.0/setup.py - copied, changed from r41750, sandbox/trunk/emailpkg/2.5/setup.py sandbox/trunk/emailpkg/3.0/testall.py - copied, changed from r42061, sandbox/trunk/emailpkg/2.5/testall.py Modified: sandbox/trunk/emailpkg/3.0/ (props changed) Log: Set up separate distutils package infrastructure for email 3.0 package. Copied: sandbox/trunk/emailpkg/3.0/setup.py (from r41750, sandbox/trunk/emailpkg/2.5/setup.py) ============================================================================== --- sandbox/trunk/emailpkg/2.5/setup.py (original) +++ sandbox/trunk/emailpkg/3.0/setup.py Mon Jan 16 15:54:10 2006 @@ -1,6 +1,6 @@ #! /usr/bin/env python # -# Copyright (C) 2001-2005 Python Software Foundation +# Copyright (C) 2001-2006 Python Software Foundation # Standard distutils setup.py install script for the `mimelib' library, a next # generation MIME library for Python. To install into your existing Python Copied: sandbox/trunk/emailpkg/3.0/testall.py (from r42061, sandbox/trunk/emailpkg/2.5/testall.py) ============================================================================== --- sandbox/trunk/emailpkg/2.5/testall.py (original) +++ sandbox/trunk/emailpkg/3.0/testall.py Mon Jan 16 15:54:10 2006 @@ -1,4 +1,4 @@ -# Copyright (C) 2002-2004 Python Software Foundation +# Copyright (C) 2002-2006 Python Software Foundation """A simple test runner, which sets up sys.path properly. From skip at pobox.com Mon Jan 16 16:46:09 2006 From: skip at pobox.com (skip at pobox.com) Date: Mon, 16 Jan 2006 09:46:09 -0600 Subject: [Python-checkins] r42064 - python/trunk/Lib/logging/handlers.py In-Reply-To: <20060116090810.7D2581E4002@bag.python.org> References: <20060116090810.7D2581E4002@bag.python.org> Message-ID: <17355.49089.866569.489467@montanaro.dyndns.org> vinay> Log: vinay> Fixed bug in time-to-midnight calculation. Is there some reason not to use datetime to do this? Skip From tim.peters at gmail.com Mon Jan 16 17:09:20 2006 From: tim.peters at gmail.com (Tim Peters) Date: Mon, 16 Jan 2006 11:09:20 -0500 Subject: [Python-checkins] r42064 - python/trunk/Lib/logging/handlers.py In-Reply-To: <17355.49089.866569.489467@montanaro.dyndns.org> References: <20060116090810.7D2581E4002@bag.python.org> <17355.49089.866569.489467@montanaro.dyndns.org> Message-ID: <1f7befae0601160809n7c7888bg2534402750d03cf6@mail.gmail.com> [vinay] >> Log: >> Fixed bug in time-to-midnight calculation. [Skip] > Is there some reason not to use datetime to do this? PEP 291 says logging intends to be compatible with Python 1.5.2, which leaves datetime out. Vinay, rather than: if (currentMinute == 0) and (currentSecond == 0): r = (24 - currentHour) * 60 * 60 # number of hours in seconds else: r = (23 - currentHour) * 60 * 60 r = r + (59 - currentMinute) * 60 # plus the number of minutes (in secs) r = r + (60 - currentSecond) # pl for clarity I suggest this instead: # A module-level constant _MIDNIGHT = 24 * 60 * 60 # number of seconds in a day r = _MIDNIGHT - ((currentHour * 60 + currentMinute) * 60 + currentSecond) That's "obviously correct" instead of "huh?" . From fredrik at pythonware.com Mon Jan 16 17:11:39 2006 From: fredrik at pythonware.com (Fredrik Lundh) Date: Mon, 16 Jan 2006 17:11:39 +0100 Subject: [Python-checkins] r42064 - python/trunk/Lib/logging/handlers.py References: <20060116090810.7D2581E4002@bag.python.org> <17355.49089.866569.489467@montanaro.dyndns.org> Message-ID: skip at pobox.com wrote: > vinay> Fixed bug in time-to-midnight calculation. > > Is there some reason not to use datetime to do this? http://www.python.org/peps/pep-0291.html From python-checkins at python.org Mon Jan 16 22:24:42 2006 From: python-checkins at python.org (vinay.sajip) Date: Mon, 16 Jan 2006 22:24:42 +0100 (CET) Subject: [Python-checkins] r42072 - python/trunk/Lib/test/test_logging.py Message-ID: <20060116212442.2D8291E4002@bag.python.org> Author: vinay.sajip Date: Mon Jan 16 22:24:38 2006 New Revision: 42072 Modified: python/trunk/Lib/test/test_logging.py Log: Added test for fileConfig. Contributed by Shane Hathaway. Modified: python/trunk/Lib/test/test_logging.py ============================================================================== --- python/trunk/Lib/test/test_logging.py (original) +++ python/trunk/Lib/test/test_logging.py Mon Jan 16 22:24:38 2006 @@ -26,7 +26,7 @@ import select import os, sys, string, struct, types, cPickle, cStringIO -import socket, threading, time +import socket, tempfile, threading, time import logging, logging.handlers, logging.config BANNER = "-- %-10s %-6s ---------------------------------------------------\n" @@ -393,6 +393,102 @@ hand.removeFilter(filt) #---------------------------------------------------------------------------- +# Test 4 +#---------------------------------------------------------------------------- + +# config0 is a standard configuratin. +config0 = """ +[loggers] +keys=root + +[handlers] +keys=hand1 + +[formatters] +keys=form1 + +[logger_root] +level=NOTSET +handlers=hand1 + +[handler_hand1] +class=StreamHandler +level=NOTSET +formatter=form1 +args=(sys.stdout,) + +[formatter_form1] +format=%(levelname)s:%(name)s:%(message)s +datefmt= +""" + +# config1 adds a little to the standard configuration. +config1 = """ +[loggers] +keys=root,parser + +[handlers] +keys=hand1 + +[formatters] +keys=form1 + +[logger_root] +level=NOTSET +handlers=hand1 + +[logger_parser] +level=DEBUG +handlers=hand1 +propagate=1 +qualname=compiler.parser + +[handler_hand1] +class=StreamHandler +level=NOTSET +formatter=form1 +args=(sys.stdout,) + +[formatter_form1] +format=%(levelname)s:%(name)s:%(message)s +datefmt= +""" + +# config2 has a subtle configuration error that should be reported +config2 = string.replace(config1, "sys.stdout", "sys.stbout") + +# config3 has a less subtle configuration error +config3 = string.replace( + config1, "formatter=form1", "formatter=misspelled_name") + +def test4(): + for i in range(4): + conf = globals()['config%d' % i] + sys.stdout.write('config%d: ' % i) + loggerDict = logging.getLogger().manager.loggerDict + saved_handlers = logging._handlers.copy() + saved_loggers = loggerDict.copy() + try: + fn = tempfile.mktemp(".ini") + f = open(fn, "w") + f.write(conf) + f.close() + try: + logging.config.fileConfig(fn) + except: + t = sys.exc_info()[0] + message(str(t)) + else: + message('ok.') + os.remove(fn) + finally: + logging._handlers.clear() + logging._handlers.update(saved_handlers) + loggerDict = logging.getLogger().manager.loggerDict + loggerDict.clear() + loggerDict.update(saved_loggers) + +#---------------------------------------------------------------------------- # Test Harness #---------------------------------------------------------------------------- def banner(nm, typ): @@ -456,6 +552,10 @@ test3() banner("log_test3", "end") + banner("log_test4", "begin") + test4() + banner("log_test4", "end") + finally: #wait for TCP receiver to terminate socketDataProcessed.wait() From python-checkins at python.org Mon Jan 16 22:25:29 2006 From: python-checkins at python.org (vinay.sajip) Date: Mon, 16 Jan 2006 22:25:29 +0100 (CET) Subject: [Python-checkins] r42073 - python/trunk/Lib/test/output/test_logging Message-ID: <20060116212529.087771E401C@bag.python.org> Author: vinay.sajip Date: Mon Jan 16 22:25:28 2006 New Revision: 42073 Modified: python/trunk/Lib/test/output/test_logging Log: Changes due to added test for fileConfig contributed by Shane Hathaway. Modified: python/trunk/Lib/test/output/test_logging ============================================================================== --- python/trunk/Lib/test/output/test_logging (original) +++ python/trunk/Lib/test/output/test_logging Mon Jan 16 22:25:28 2006 @@ -485,6 +485,12 @@ INFO:a.b.c:Info 4 INFO:a.b.c.d:Info 5 -- log_test3 end --------------------------------------------------- +-- log_test4 begin --------------------------------------------------- +config0: ok. +config1: ok. +config2: exceptions.AttributeError +config3: exceptions.KeyError +-- log_test4 end --------------------------------------------------- -- logrecv output begin --------------------------------------------------- ERR -> CRITICAL: Message 0 (via logrecv.tcp.ERR) ERR -> ERROR: Message 1 (via logrecv.tcp.ERR) From python-checkins at python.org Mon Jan 16 22:28:38 2006 From: python-checkins at python.org (vinay.sajip) Date: Mon, 16 Jan 2006 22:28:38 +0100 (CET) Subject: [Python-checkins] r42074 - python/trunk/Lib/logging/config.py Message-ID: <20060116212838.3EF8D1E4007@bag.python.org> Author: vinay.sajip Date: Mon Jan 16 22:28:37 2006 New Revision: 42074 Modified: python/trunk/Lib/logging/config.py Log: Refactoring for fileConfig. Contributed by Shane Hathaway. Modified: python/trunk/Lib/logging/config.py ============================================================================== --- python/trunk/Lib/logging/config.py (original) +++ python/trunk/Lib/logging/config.py Mon Jan 16 22:28:37 2006 @@ -72,135 +72,147 @@ cp.readfp(fname) else: cp.read(fname) - #first, do the formatters... - flist = cp.get("formatters", "keys") - if len(flist): - flist = string.split(flist, ",") - formatters = {} - for form in flist: - sectname = "formatter_%s" % form - opts = cp.options(sectname) - if "format" in opts: - fs = cp.get(sectname, "format", 1) - else: - fs = None - if "datefmt" in opts: - dfs = cp.get(sectname, "datefmt", 1) - else: - dfs = None - f = logging.Formatter(fs, dfs) - formatters[form] = f - #next, do the handlers... - #critical section... + + formatters = _create_formatters(cp) + + # critical section logging._acquireLock() try: - try: - #first, lose the existing handlers... - logging._handlers.clear() - #now set up the new ones... - hlist = cp.get("handlers", "keys") - if len(hlist): - hlist = string.split(hlist, ",") - handlers = {} - fixups = [] #for inter-handler references - for hand in hlist: - try: - sectname = "handler_%s" % hand - klass = cp.get(sectname, "class") - opts = cp.options(sectname) - if "formatter" in opts: - fmt = cp.get(sectname, "formatter") - else: - fmt = "" - klass = eval(klass, vars(logging)) - args = cp.get(sectname, "args") - args = eval(args, vars(logging)) - h = apply(klass, args) - if "level" in opts: - level = cp.get(sectname, "level") - h.setLevel(logging._levelNames[level]) - if len(fmt): - h.setFormatter(formatters[fmt]) - #temporary hack for FileHandler and MemoryHandler. - if klass == logging.handlers.MemoryHandler: - if "target" in opts: - target = cp.get(sectname,"target") - else: - target = "" - if len(target): #the target handler may not be loaded yet, so keep for later... - fixups.append((h, target)) - handlers[hand] = h - except: #if an error occurs when instantiating a handler, too bad - pass #this could happen e.g. because of lack of privileges - #now all handlers are loaded, fixup inter-handler references... - for fixup in fixups: - h = fixup[0] - t = fixup[1] - h.setTarget(handlers[t]) - #at last, the loggers...first the root... - llist = cp.get("loggers", "keys") - llist = string.split(llist, ",") - llist.remove("root") - sectname = "logger_root" - root = logging.root - log = root - opts = cp.options(sectname) - if "level" in opts: - level = cp.get(sectname, "level") - log.setLevel(logging._levelNames[level]) - for h in root.handlers[:]: - root.removeHandler(h) - hlist = cp.get(sectname, "handlers") - if len(hlist): - hlist = string.split(hlist, ",") - for hand in hlist: - log.addHandler(handlers[hand]) - #and now the others... - #we don't want to lose the existing loggers, - #since other threads may have pointers to them. - #existing is set to contain all existing loggers, - #and as we go through the new configuration we - #remove any which are configured. At the end, - #what's left in existing is the set of loggers - #which were in the previous configuration but - #which are not in the new configuration. - existing = root.manager.loggerDict.keys() - #now set up the new ones... - for log in llist: - sectname = "logger_%s" % log - qn = cp.get(sectname, "qualname") - opts = cp.options(sectname) - if "propagate" in opts: - propagate = cp.getint(sectname, "propagate") - else: - propagate = 1 - logger = logging.getLogger(qn) - if qn in existing: - existing.remove(qn) - if "level" in opts: - level = cp.get(sectname, "level") - logger.setLevel(logging._levelNames[level]) - for h in logger.handlers[:]: - logger.removeHandler(h) - logger.propagate = propagate - logger.disabled = 0 - hlist = cp.get(sectname, "handlers") - if len(hlist): - hlist = string.split(hlist, ",") - for hand in hlist: - logger.addHandler(handlers[hand]) - #Disable any old loggers. There's no point deleting - #them as other threads may continue to hold references - #and by disabling them, you stop them doing any logging. - for log in existing: - root.manager.loggerDict[log].disabled = 1 - except: - ei = sys.exc_info() - traceback.print_exception(ei[0], ei[1], ei[2], None, sys.stderr) - del ei + logging._handlers.clear() + # Handlers add themselves to logging._handlers + handlers = _install_handlers(cp, formatters) + _install_loggers(cp, handlers) finally: logging._releaseLock() + +def _create_formatters(cp): + """Create and return formatters""" + flist = cp.get("formatters", "keys") + if not len(flist): + return {} + flist = string.split(flist, ",") + formatters = {} + for form in flist: + sectname = "formatter_%s" % form + opts = cp.options(sectname) + if "format" in opts: + fs = cp.get(sectname, "format", 1) + else: + fs = None + if "datefmt" in opts: + dfs = cp.get(sectname, "datefmt", 1) + else: + dfs = None + f = logging.Formatter(fs, dfs) + formatters[form] = f + return formatters + + +def _install_handlers(cp, formatters): + """Install and return handlers""" + hlist = cp.get("handlers", "keys") + if not len(hlist): + return {} + hlist = string.split(hlist, ",") + handlers = {} + fixups = [] #for inter-handler references + for hand in hlist: + sectname = "handler_%s" % hand + klass = cp.get(sectname, "class") + opts = cp.options(sectname) + if "formatter" in opts: + fmt = cp.get(sectname, "formatter") + else: + fmt = "" + klass = eval(klass, vars(logging)) + args = cp.get(sectname, "args") + args = eval(args, vars(logging)) + h = apply(klass, args) + if "level" in opts: + level = cp.get(sectname, "level") + h.setLevel(logging._levelNames[level]) + if len(fmt): + h.setFormatter(formatters[fmt]) + #temporary hack for FileHandler and MemoryHandler. + if klass == logging.handlers.MemoryHandler: + if "target" in opts: + target = cp.get(sectname,"target") + else: + target = "" + if len(target): #the target handler may not be loaded yet, so keep for later... + fixups.append((h, target)) + handlers[hand] = h + #now all handlers are loaded, fixup inter-handler references... + for h, t in fixups: + h.setTarget(handlers[t]) + return handlers + + +def _install_loggers(cp, handlers): + """Create and install loggers""" + + # configure the root first + llist = cp.get("loggers", "keys") + llist = string.split(llist, ",") + llist.remove("root") + sectname = "logger_root" + root = logging.root + log = root + opts = cp.options(sectname) + if "level" in opts: + level = cp.get(sectname, "level") + log.setLevel(logging._levelNames[level]) + for h in root.handlers[:]: + root.removeHandler(h) + hlist = cp.get(sectname, "handlers") + if len(hlist): + hlist = string.split(hlist, ",") + for hand in hlist: + log.addHandler(handlers[hand]) + + #and now the others... + #we don't want to lose the existing loggers, + #since other threads may have pointers to them. + #existing is set to contain all existing loggers, + #and as we go through the new configuration we + #remove any which are configured. At the end, + #what's left in existing is the set of loggers + #which were in the previous configuration but + #which are not in the new configuration. + existing = root.manager.loggerDict.keys() + #now set up the new ones... + for log in llist: + sectname = "logger_%s" % log + qn = cp.get(sectname, "qualname") + opts = cp.options(sectname) + if "propagate" in opts: + propagate = cp.getint(sectname, "propagate") + else: + propagate = 1 + logger = logging.getLogger(qn) + if qn in existing: + existing.remove(qn) + if "level" in opts: + level = cp.get(sectname, "level") + logger.setLevel(logging._levelNames[level]) + for h in logger.handlers[:]: + logger.removeHandler(h) + logger.propagate = propagate + logger.disabled = 0 + hlist = cp.get(sectname, "handlers") + if len(hlist): + hlist = string.split(hlist, ",") + for hand in hlist: + logger.addHandler(handlers[hand]) + + #Disable any old loggers. There's no point deleting + #them as other threads may continue to hold references + #and by disabling them, you stop them doing any logging. + for log in existing: + root.manager.loggerDict[log].disabled = 1 + + def listen(port=DEFAULT_LOGGING_CONFIG_PORT): """ Start up a socket server on the specified port, and listen for new @@ -247,7 +259,12 @@ f = open(file, "w") f.write(chunk) f.close() - fileConfig(file) + try: + fileConfig(file) + except (KeyboardInterrupt, SystemExit): + raise + except: + traceback.print_exc() os.remove(file) except socket.error, e: if type(e.args) != types.TupleType: From python-checkins at python.org Tue Jan 17 05:34:57 2006 From: python-checkins at python.org (barry.warsaw) Date: Tue, 17 Jan 2006 05:34:57 +0100 (CET) Subject: [Python-checkins] r42075 - python/branches/release23-maint/Lib/email/Message.py python/branches/release23-maint/Lib/email/__init__.py python/branches/release23-maint/Lib/email/test/data/msg_41.txt python/branches/release23-maint/Lib/email/test/test_email.py sandbox/trunk/emailpkg/2.5/setup.py Message-ID: <20060117043457.BEFC71E4002@bag.python.org> Author: barry.warsaw Date: Tue Jan 17 05:34:54 2006 New Revision: 42075 Added: python/branches/release23-maint/Lib/email/test/data/msg_41.txt Modified: python/branches/release23-maint/Lib/email/Message.py python/branches/release23-maint/Lib/email/__init__.py python/branches/release23-maint/Lib/email/test/test_email.py sandbox/trunk/emailpkg/2.5/setup.py Log: SF bug #1403349 solution for email 2.5; some MUAs use the 'file' parameter name in the Content-Distribution header, so Message.get_filename() should fall back to using that. Will port both to email 3.0 and Python 2.5 trunk. Also, bump the email package version to 2.5.7 for eventual release. Of course, add a test case too. XXX Need to update the documentation. Modified: python/branches/release23-maint/Lib/email/Message.py ============================================================================== --- python/branches/release23-maint/Lib/email/Message.py (original) +++ python/branches/release23-maint/Lib/email/Message.py Tue Jan 17 05:34:54 2006 @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2005 Python Software Foundation +# Copyright (C) 2001-2006 Python Software Foundation # Author: barry at python.org (Barry Warsaw) """Basic message object for the email package object model.""" @@ -718,11 +718,16 @@ """Return the filename associated with the payload if present. The filename is extracted from the Content-Disposition header's - `filename' parameter, and it is unquoted. + `filename' parameter, and it is unquoted. If that header is missing + the `filename' parameter, this method falls back to looking for the + `name' parameter. """ missing = [] filename = self.get_param('filename', missing, 'content-disposition') if filename is missing: + # Some MUAs use a different parameter name + filename = self.get_param('name', missing, 'content-disposition') + if filename is missing: return failobj if isinstance(filename, TupleType): # It's an RFC 2231 encoded parameter Modified: python/branches/release23-maint/Lib/email/__init__.py ============================================================================== --- python/branches/release23-maint/Lib/email/__init__.py (original) +++ python/branches/release23-maint/Lib/email/__init__.py Tue Jan 17 05:34:54 2006 @@ -1,10 +1,9 @@ -# Copyright (C) 2001-2005 Python Software Foundation +# Copyright (C) 2001-2006 Python Software Foundation # Author: barry at python.org (Barry Warsaw) -"""A package for parsing, handling, and generating email messages. -""" +"""A package for parsing, handling, and generating email messages.""" -__version__ = '2.5.6' +__version__ = '2.5.7' __all__ = [ 'base64MIME', Added: python/branches/release23-maint/Lib/email/test/data/msg_41.txt ============================================================================== --- (empty file) +++ python/branches/release23-maint/Lib/email/test/data/msg_41.txt Tue Jan 17 05:34:54 2006 @@ -0,0 +1,35 @@ +Return-Path: +Delivered-To: barry at python.org +Received: by mail.python.org (Postfix, from userid 889) + id C2BF0D37C6; Tue, 11 Sep 2001 00:05:05 -0400 (EDT) +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="h90VIIIKmx" +Content-Transfer-Encoding: 7bit +Message-ID: <15261.36209.358846.118674 at anthem.python.org> +From: barry at python.org (Barry A. Warsaw) +To: barry at python.org +Subject: a simple multipart +Date: Tue, 11 Sep 2001 00:05:05 -0400 +X-Mailer: VM 6.95 under 21.4 (patch 4) "Artificial Intelligence" XEmacs Lucid +X-Attribution: BAW +X-Oblique-Strategy: Make a door into a window + + +--h90VIIIKmx +Content-Type: text/plain +Content-Disposition: inline; name="msg.txt" +Content-Transfer-Encoding: 7bit + +a simple kind of mirror +to reflect upon our own + +--h90VIIIKmx +Content-Type: text/plain +Content-Disposition: inline; name="msg.txt" +Content-Transfer-Encoding: 7bit + +a simple kind of mirror +to reflect upon our own + +--h90VIIIKmx-- + Modified: python/branches/release23-maint/Lib/email/test/test_email.py ============================================================================== --- python/branches/release23-maint/Lib/email/test/test_email.py (original) +++ python/branches/release23-maint/Lib/email/test/test_email.py Tue Jan 17 05:34:54 2006 @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2005 Python Software Foundation +# Copyright (C) 2001-2006 Python Software Foundation # email package unit tests import os @@ -158,6 +158,13 @@ subpart = msg.get_payload(1) eq(subpart.get_filename(), 'dingusfish.gif') + def test_get_filename_with_name_parameter(self): + eq = self.assertEqual + + msg = self._msgobj('msg_41.txt') + filenames = [p.get_filename() for p in msg.get_payload()] + eq(filenames, ['msg.txt', 'msg.txt']) + def test_get_boundary(self): eq = self.assertEqual msg = self._msgobj('msg_07.txt') Modified: sandbox/trunk/emailpkg/2.5/setup.py ============================================================================== --- sandbox/trunk/emailpkg/2.5/setup.py (original) +++ sandbox/trunk/emailpkg/2.5/setup.py Tue Jan 17 05:34:54 2006 @@ -1,6 +1,6 @@ #! /usr/bin/env python # -# Copyright (C) 2001-2005 Python Software Foundation +# Copyright (C) 2001-2006 Python Software Foundation # Standard distutils setup.py install script for the `mimelib' library, a next # generation MIME library for Python. To install into your existing Python From python-checkins at python.org Tue Jan 17 05:49:08 2006 From: python-checkins at python.org (barry.warsaw) Date: Tue, 17 Jan 2006 05:49:08 +0100 (CET) Subject: [Python-checkins] r42076 - in python/trunk/Lib/email: Message.py __init__.py test/data/msg_44.txt test/test_email.py Message-ID: <20060117044908.4F6601E4002@bag.python.org> Author: barry.warsaw Date: Tue Jan 17 05:49:07 2006 New Revision: 42076 Added: python/trunk/Lib/email/test/data/msg_44.txt - copied unchanged from r42075, python/branches/release23-maint/Lib/email/test/data/msg_41.txt Modified: python/trunk/Lib/email/Message.py python/trunk/Lib/email/__init__.py python/trunk/Lib/email/test/test_email.py Log: Ported 42075 from release23-maint branch. SF bug #1403349 solution for email 3.0; some MUAs use the 'file' parameter name in the Content-Distribution header, so Message.get_filename() should fall back to using that. Will port to the Python 2.5 trunk. Also, bump the email package version to 3.0.1 for eventual release. Of course, add a test case too. XXX Need to update the documentation. Modified: python/trunk/Lib/email/Message.py ============================================================================== --- python/trunk/Lib/email/Message.py (original) +++ python/trunk/Lib/email/Message.py Tue Jan 17 05:49:07 2006 @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2004 Python Software Foundation +# Copyright (C) 2001-2006 Python Software Foundation # Author: Barry Warsaw # Contact: email-sig at python.org @@ -701,11 +701,15 @@ """Return the filename associated with the payload if present. The filename is extracted from the Content-Disposition header's - `filename' parameter, and it is unquoted. + `filename' parameter, and it is unquoted. If that header is missing + the `filename' parameter, this method falls back to looking for the + `name' parameter. """ missing = object() filename = self.get_param('filename', missing, 'content-disposition') if filename is missing: + filename = self.get_param('name', missing, 'content-disposition') + if filename is missing: return failobj return Utils.collapse_rfc2231_value(filename).strip() Modified: python/trunk/Lib/email/__init__.py ============================================================================== --- python/trunk/Lib/email/__init__.py (original) +++ python/trunk/Lib/email/__init__.py Tue Jan 17 05:49:07 2006 @@ -1,10 +1,10 @@ -# Copyright (C) 2001-2004 Python Software Foundation +# Copyright (C) 2001-2006 Python Software Foundation # Author: Barry Warsaw # Contact: email-sig at python.org """A package for parsing, handling, and generating email messages.""" -__version__ = '3.0+' +__version__ = '3.0.1' __all__ = [ 'base64MIME', Modified: python/trunk/Lib/email/test/test_email.py ============================================================================== --- python/trunk/Lib/email/test/test_email.py (original) +++ python/trunk/Lib/email/test/test_email.py Tue Jan 17 05:49:07 2006 @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2004 Python Software Foundation +# Copyright (C) 2001-2006 Python Software Foundation # Contact: email-sig at python.org # email package unit tests @@ -147,6 +147,13 @@ subpart = msg.get_payload(1) eq(subpart.get_filename(), 'dingusfish.gif') + def test_get_filename_with_name_parameter(self): + eq = self.assertEqual + + msg = self._msgobj('msg_44.txt') + filenames = [p.get_filename() for p in msg.get_payload()] + eq(filenames, ['msg.txt', 'msg.txt']) + def test_get_boundary(self): eq = self.assertEqual msg = self._msgobj('msg_07.txt') From python-checkins at python.org Tue Jan 17 06:09:20 2006 From: python-checkins at python.org (barry.warsaw) Date: Tue, 17 Jan 2006 06:09:20 +0100 (CET) Subject: [Python-checkins] r42077 - in python/branches/release24-maint/Lib/email: Message.py __init__.py test/data/msg_44.txt test/test_email.py Message-ID: <20060117050920.E9BC11E4002@bag.python.org> Author: barry.warsaw Date: Tue Jan 17 06:09:19 2006 New Revision: 42077 Added: python/branches/release24-maint/Lib/email/test/data/msg_44.txt - copied unchanged from r42076, python/trunk/Lib/email/test/data/msg_44.txt Modified: python/branches/release24-maint/Lib/email/Message.py python/branches/release24-maint/Lib/email/__init__.py python/branches/release24-maint/Lib/email/test/test_email.py Log: Ported 42076 from the trunk. SF bug #1403349 solution for email 3.0; some MUAs use the 'file' parameter name in the Content-Distribution header, so Message.get_filename() should fall back to using that. Also, bump the email package version to 3.0.1 for eventual release. Of course, add a test case too. XXX Need to update the documentation. Modified: python/branches/release24-maint/Lib/email/Message.py ============================================================================== --- python/branches/release24-maint/Lib/email/Message.py (original) +++ python/branches/release24-maint/Lib/email/Message.py Tue Jan 17 06:09:19 2006 @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2004 Python Software Foundation +# Copyright (C) 2001-2006 Python Software Foundation # Author: Barry Warsaw # Contact: email-sig at python.org @@ -701,11 +701,15 @@ """Return the filename associated with the payload if present. The filename is extracted from the Content-Disposition header's - `filename' parameter, and it is unquoted. + `filename' parameter, and it is unquoted. If that header is missing + the `filename' parameter, this method falls back to looking for the + `name' parameter. """ missing = object() filename = self.get_param('filename', missing, 'content-disposition') if filename is missing: + filename = self.get_param('name', missing, 'content-disposition') + if filename is missing: return failobj return Utils.collapse_rfc2231_value(filename).strip() Modified: python/branches/release24-maint/Lib/email/__init__.py ============================================================================== --- python/branches/release24-maint/Lib/email/__init__.py (original) +++ python/branches/release24-maint/Lib/email/__init__.py Tue Jan 17 06:09:19 2006 @@ -1,10 +1,10 @@ -# Copyright (C) 2001-2004 Python Software Foundation +# Copyright (C) 2001-2006 Python Software Foundation # Author: Barry Warsaw # Contact: email-sig at python.org """A package for parsing, handling, and generating email messages.""" -__version__ = '3.0+' +__version__ = '3.0.1' __all__ = [ 'base64MIME', Modified: python/branches/release24-maint/Lib/email/test/test_email.py ============================================================================== --- python/branches/release24-maint/Lib/email/test/test_email.py (original) +++ python/branches/release24-maint/Lib/email/test/test_email.py Tue Jan 17 06:09:19 2006 @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2004 Python Software Foundation +# Copyright (C) 2001-2006 Python Software Foundation # Contact: email-sig at python.org # email package unit tests @@ -147,6 +147,13 @@ subpart = msg.get_payload(1) eq(subpart.get_filename(), 'dingusfish.gif') + def test_get_filename_with_name_parameter(self): + eq = self.assertEqual + + msg = self._msgobj('msg_44.txt') + filenames = [p.get_filename() for p in msg.get_payload()] + eq(filenames, ['msg.txt', 'msg.txt']) + def test_get_boundary(self): eq = self.assertEqual msg = self._msgobj('msg_07.txt') From nnorwitz at gmail.com Tue Jan 17 06:12:13 2006 From: nnorwitz at gmail.com (Neal Norwitz) Date: Mon, 16 Jan 2006 21:12:13 -0800 Subject: [Python-checkins] r42074 - python/trunk/Lib/logging/config.py In-Reply-To: <20060116212838.3EF8D1E4007@bag.python.org> References: <20060116212838.3EF8D1E4007@bag.python.org> Message-ID: What's the point of checking if not len(somelist)? Why not just do if not somelist (ie, drop the len())? There were a bunch of cases of this I noticed in the code below. Search for here. n -- On 1/16/06, vinay.sajip wrote: > Author: vinay.sajip > Date: Mon Jan 16 22:28:37 2006 > New Revision: 42074 > > Modified: > python/trunk/Lib/logging/config.py > Log: > Refactoring for fileConfig. Contributed by Shane Hathaway. > > Modified: python/trunk/Lib/logging/config.py > ============================================================================== > --- python/trunk/Lib/logging/config.py (original) > +++ python/trunk/Lib/logging/config.py Mon Jan 16 22:28:37 2006 > @@ -72,135 +72,147 @@ > cp.readfp(fname) > else: > cp.read(fname) > - #first, do the formatters... > - flist = cp.get("formatters", "keys") > - if len(flist): > - flist = string.split(flist, ",") > - formatters = {} > - for form in flist: > - sectname = "formatter_%s" % form > - opts = cp.options(sectname) > - if "format" in opts: > - fs = cp.get(sectname, "format", 1) > - else: > - fs = None > - if "datefmt" in opts: > - dfs = cp.get(sectname, "datefmt", 1) > - else: > - dfs = None > - f = logging.Formatter(fs, dfs) > - formatters[form] = f > - #next, do the handlers... > - #critical section... > + > + formatters = _create_formatters(cp) > + > + # critical section > logging._acquireLock() > try: > - try: > - #first, lose the existing handlers... > - logging._handlers.clear() > - #now set up the new ones... > - hlist = cp.get("handlers", "keys") > - if len(hlist): > - hlist = string.split(hlist, ",") > - handlers = {} > - fixups = [] #for inter-handler references > - for hand in hlist: > - try: > - sectname = "handler_%s" % hand > - klass = cp.get(sectname, "class") > - opts = cp.options(sectname) > - if "formatter" in opts: > - fmt = cp.get(sectname, "formatter") > - else: > - fmt = "" > - klass = eval(klass, vars(logging)) > - args = cp.get(sectname, "args") > - args = eval(args, vars(logging)) > - h = apply(klass, args) > - if "level" in opts: > - level = cp.get(sectname, "level") > - h.setLevel(logging._levelNames[level]) > - if len(fmt): > - h.setFormatter(formatters[fmt]) > - #temporary hack for FileHandler and MemoryHandler. > - if klass == logging.handlers.MemoryHandler: > - if "target" in opts: > - target = cp.get(sectname,"target") > - else: > - target = "" > - if len(target): #the target handler may not be loaded yet, so keep for later... > - fixups.append((h, target)) > - handlers[hand] = h > - except: #if an error occurs when instantiating a handler, too bad > - pass #this could happen e.g. because of lack of privileges > - #now all handlers are loaded, fixup inter-handler references... > - for fixup in fixups: > - h = fixup[0] > - t = fixup[1] > - h.setTarget(handlers[t]) > - #at last, the loggers...first the root... > - llist = cp.get("loggers", "keys") > - llist = string.split(llist, ",") > - llist.remove("root") > - sectname = "logger_root" > - root = logging.root > - log = root > - opts = cp.options(sectname) > - if "level" in opts: > - level = cp.get(sectname, "level") > - log.setLevel(logging._levelNames[level]) > - for h in root.handlers[:]: > - root.removeHandler(h) > - hlist = cp.get(sectname, "handlers") > - if len(hlist): > - hlist = string.split(hlist, ",") > - for hand in hlist: > - log.addHandler(handlers[hand]) > - #and now the others... > - #we don't want to lose the existing loggers, > - #since other threads may have pointers to them. > - #existing is set to contain all existing loggers, > - #and as we go through the new configuration we > - #remove any which are configured. At the end, > - #what's left in existing is the set of loggers > - #which were in the previous configuration but > - #which are not in the new configuration. > - existing = root.manager.loggerDict.keys() > - #now set up the new ones... > - for log in llist: > - sectname = "logger_%s" % log > - qn = cp.get(sectname, "qualname") > - opts = cp.options(sectname) > - if "propagate" in opts: > - propagate = cp.getint(sectname, "propagate") > - else: > - propagate = 1 > - logger = logging.getLogger(qn) > - if qn in existing: > - existing.remove(qn) > - if "level" in opts: > - level = cp.get(sectname, "level") > - logger.setLevel(logging._levelNames[level]) > - for h in logger.handlers[:]: > - logger.removeHandler(h) > - logger.propagate = propagate > - logger.disabled = 0 > - hlist = cp.get(sectname, "handlers") > - if len(hlist): > - hlist = string.split(hlist, ",") > - for hand in hlist: > - logger.addHandler(handlers[hand]) > - #Disable any old loggers. There's no point deleting > - #them as other threads may continue to hold references > - #and by disabling them, you stop them doing any logging. > - for log in existing: > - root.manager.loggerDict[log].disabled = 1 > - except: > - ei = sys.exc_info() > - traceback.print_exception(ei[0], ei[1], ei[2], None, sys.stderr) > - del ei > + logging._handlers.clear() > + # Handlers add themselves to logging._handlers > + handlers = _install_handlers(cp, formatters) > + _install_loggers(cp, handlers) > finally: > logging._releaseLock() > > + > +def _create_formatters(cp): > + """Create and return formatters""" > + flist = cp.get("formatters", "keys") > + if not len(flist): here > + return {} > + flist = string.split(flist, ",") > + formatters = {} > + for form in flist: > + sectname = "formatter_%s" % form > + opts = cp.options(sectname) > + if "format" in opts: > + fs = cp.get(sectname, "format", 1) > + else: > + fs = None > + if "datefmt" in opts: > + dfs = cp.get(sectname, "datefmt", 1) > + else: > + dfs = None > + f = logging.Formatter(fs, dfs) > + formatters[form] = f > + return formatters > + > + > +def _install_handlers(cp, formatters): > + """Install and return handlers""" > + hlist = cp.get("handlers", "keys") > + if not len(hlist): here > + return {} > + hlist = string.split(hlist, ",") > + handlers = {} > + fixups = [] #for inter-handler references > + for hand in hlist: > + sectname = "handler_%s" % hand > + klass = cp.get(sectname, "class") > + opts = cp.options(sectname) > + if "formatter" in opts: > + fmt = cp.get(sectname, "formatter") > + else: > + fmt = "" > + klass = eval(klass, vars(logging)) > + args = cp.get(sectname, "args") > + args = eval(args, vars(logging)) > + h = apply(klass, args) > + if "level" in opts: > + level = cp.get(sectname, "level") > + h.setLevel(logging._levelNames[level]) > + if len(fmt): here > + h.setFormatter(formatters[fmt]) > + #temporary hack for FileHandler and MemoryHandler. > + if klass == logging.handlers.MemoryHandler: > + if "target" in opts: > + target = cp.get(sectname,"target") > + else: > + target = "" > + if len(target): #the target handler may not be loaded yet, so keep for later... here > + fixups.append((h, target)) > + handlers[hand] = h > + #now all handlers are loaded, fixup inter-handler references... > + for h, t in fixups: > + h.setTarget(handlers[t]) > + return handlers > + > + > +def _install_loggers(cp, handlers): > + """Create and install loggers""" > + > + # configure the root first > + llist = cp.get("loggers", "keys") > + llist = string.split(llist, ",") > + llist.remove("root") > + sectname = "logger_root" > + root = logging.root > + log = root > + opts = cp.options(sectname) > + if "level" in opts: > + level = cp.get(sectname, "level") > + log.setLevel(logging._levelNames[level]) > + for h in root.handlers[:]: > + root.removeHandler(h) > + hlist = cp.get(sectname, "handlers") > + if len(hlist): here > + hlist = string.split(hlist, ",") > + for hand in hlist: > + log.addHandler(handlers[hand]) > + > + #and now the others... > + #we don't want to lose the existing loggers, > + #since other threads may have pointers to them. > + #existing is set to contain all existing loggers, > + #and as we go through the new configuration we > + #remove any which are configured. At the end, > + #what's left in existing is the set of loggers > + #which were in the previous configuration but > + #which are not in the new configuration. > + existing = root.manager.loggerDict.keys() > + #now set up the new ones... > + for log in llist: > + sectname = "logger_%s" % log > + qn = cp.get(sectname, "qualname") > + opts = cp.options(sectname) > + if "propagate" in opts: > + propagate = cp.getint(sectname, "propagate") > + else: > + propagate = 1 > + logger = logging.getLogger(qn) > + if qn in existing: > + existing.remove(qn) > + if "level" in opts: > + level = cp.get(sectname, "level") > + logger.setLevel(logging._levelNames[level]) > + for h in logger.handlers[:]: > + logger.removeHandler(h) > + logger.propagate = propagate > + logger.disabled = 0 > + hlist = cp.get(sectname, "handlers") > + if len(hlist): here > + hlist = string.split(hlist, ",") > + for hand in hlist: > + logger.addHandler(handlers[hand]) > + > + #Disable any old loggers. There's no point deleting > + #them as other threads may continue to hold references > + #and by disabling them, you stop them doing any logging. > + for log in existing: > + root.manager.loggerDict[log].disabled = 1 > + > + > def listen(port=DEFAULT_LOGGING_CONFIG_PORT): > """ > Start up a socket server on the specified port, and listen for new > @@ -247,7 +259,12 @@ > f = open(file, "w") > f.write(chunk) > f.close() > - fileConfig(file) > + try: > + fileConfig(file) > + except (KeyboardInterrupt, SystemExit): > + raise > + except: > + traceback.print_exc() > os.remove(file) > except socket.error, e: > if type(e.args) != types.TupleType: > _______________________________________________ > Python-checkins mailing list > Python-checkins at python.org > http://mail.python.org/mailman/listinfo/python-checkins > From nnorwitz at gmail.com Tue Jan 17 06:14:25 2006 From: nnorwitz at gmail.com (Neal Norwitz) Date: Mon, 16 Jan 2006 21:14:25 -0800 Subject: [Python-checkins] r42072 - python/trunk/Lib/test/test_logging.py In-Reply-To: <20060116212442.2D8291E4002@bag.python.org> References: <20060116212442.2D8291E4002@bag.python.org> Message-ID: I'm not sure if it was this checkin, but one of the logging checkins appears to have broken the test run. I'm not sure if it was in between checkins or if the test is unstable or what. http://www.python.org/dev/buildbot/sparc%20solaris10%20gcc%20trunk/builds/116/step-test/0 n -- On 1/16/06, vinay.sajip wrote: > Author: vinay.sajip > Date: Mon Jan 16 22:24:38 2006 > New Revision: 42072 > > Modified: > python/trunk/Lib/test/test_logging.py > Log: > Added test for fileConfig. Contributed by Shane Hathaway. > > Modified: python/trunk/Lib/test/test_logging.py > ============================================================================== > --- python/trunk/Lib/test/test_logging.py (original) > +++ python/trunk/Lib/test/test_logging.py Mon Jan 16 22:24:38 2006 > @@ -26,7 +26,7 @@ > > import select > import os, sys, string, struct, types, cPickle, cStringIO > -import socket, threading, time > +import socket, tempfile, threading, time > import logging, logging.handlers, logging.config > > BANNER = "-- %-10s %-6s ---------------------------------------------------\n" > @@ -393,6 +393,102 @@ > hand.removeFilter(filt) > > #---------------------------------------------------------------------------- > +# Test 4 > +#---------------------------------------------------------------------------- > + > +# config0 is a standard configuratin. > +config0 = """ > +[loggers] > +keys=root > + > +[handlers] > +keys=hand1 > + > +[formatters] > +keys=form1 > + > +[logger_root] > +level=NOTSET > +handlers=hand1 > + > +[handler_hand1] > +class=StreamHandler > +level=NOTSET > +formatter=form1 > +args=(sys.stdout,) > + > +[formatter_form1] > +format=%(levelname)s:%(name)s:%(message)s > +datefmt= > +""" > + > +# config1 adds a little to the standard configuration. > +config1 = """ > +[loggers] > +keys=root,parser > + > +[handlers] > +keys=hand1 > + > +[formatters] > +keys=form1 > + > +[logger_root] > +level=NOTSET > +handlers=hand1 > + > +[logger_parser] > +level=DEBUG > +handlers=hand1 > +propagate=1 > +qualname=compiler.parser > + > +[handler_hand1] > +class=StreamHandler > +level=NOTSET > +formatter=form1 > +args=(sys.stdout,) > + > +[formatter_form1] > +format=%(levelname)s:%(name)s:%(message)s > +datefmt= > +""" > + > +# config2 has a subtle configuration error that should be reported > +config2 = string.replace(config1, "sys.stdout", "sys.stbout") > + > +# config3 has a less subtle configuration error > +config3 = string.replace( > + config1, "formatter=form1", "formatter=misspelled_name") > + > +def test4(): > + for i in range(4): > + conf = globals()['config%d' % i] > + sys.stdout.write('config%d: ' % i) > + loggerDict = logging.getLogger().manager.loggerDict > + saved_handlers = logging._handlers.copy() > + saved_loggers = loggerDict.copy() > + try: > + fn = tempfile.mktemp(".ini") > + f = open(fn, "w") > + f.write(conf) > + f.close() > + try: > + logging.config.fileConfig(fn) > + except: > + t = sys.exc_info()[0] > + message(str(t)) > + else: > + message('ok.') > + os.remove(fn) > + finally: > + logging._handlers.clear() > + logging._handlers.update(saved_handlers) > + loggerDict = logging.getLogger().manager.loggerDict > + loggerDict.clear() > + loggerDict.update(saved_loggers) > + > +#---------------------------------------------------------------------------- > # Test Harness > #---------------------------------------------------------------------------- > def banner(nm, typ): > @@ -456,6 +552,10 @@ > test3() > banner("log_test3", "end") > > + banner("log_test4", "begin") > + test4() > + banner("log_test4", "end") > + > finally: > #wait for TCP receiver to terminate > socketDataProcessed.wait() > _______________________________________________ > Python-checkins mailing list > Python-checkins at python.org > http://mail.python.org/mailman/listinfo/python-checkins > From python-checkins at python.org Tue Jan 17 06:17:30 2006 From: python-checkins at python.org (barry.warsaw) Date: Tue, 17 Jan 2006 06:17:30 +0100 (CET) Subject: [Python-checkins] r42078 - python/branches/release23-maint/Doc/lib/emailmessage.tex Message-ID: <20060117051730.AC9751E4002@bag.python.org> Author: barry.warsaw Date: Tue Jan 17 06:17:29 2006 New Revision: 42078 Modified: python/branches/release23-maint/Doc/lib/emailmessage.tex Log: Updated docs for email.Message.get_filename() Modified: python/branches/release23-maint/Doc/lib/emailmessage.tex ============================================================================== --- python/branches/release23-maint/Doc/lib/emailmessage.tex (original) +++ python/branches/release23-maint/Doc/lib/emailmessage.tex Tue Jan 17 06:17:29 2006 @@ -431,10 +431,11 @@ \begin{methoddesc}[Message]{get_filename}{\optional{failobj}} Return the value of the \code{filename} parameter of the -\mailheader{Content-Disposition} header of the message, or \var{failobj} if -either the header is missing, or has no \code{filename} parameter. -The returned string will always be unquoted as per -\method{Utils.unquote()}. +\mailheader{Content-Disposition} header of the message. If the header does +not have a \code{filename} parameter, this method falls back to looking for +the \code{name} parameter. If neither is found, or the header is missing, +then \var{failobj} is returned. The returned string will always be unquoted +as per \method{Utils.unquote()}. \end{methoddesc} \begin{methoddesc}[Message]{get_boundary}{\optional{failobj}} From python-checkins at python.org Tue Jan 17 06:24:25 2006 From: python-checkins at python.org (barry.warsaw) Date: Tue, 17 Jan 2006 06:24:25 +0100 (CET) Subject: [Python-checkins] r42079 - python/trunk/Doc/lib/emailmessage.tex Message-ID: <20060117052425.6EB2D1E4002@bag.python.org> Author: barry.warsaw Date: Tue Jan 17 06:24:25 2006 New Revision: 42079 Modified: python/trunk/Doc/lib/emailmessage.tex Log: Update email.Message.get_filename() documentation. Modified: python/trunk/Doc/lib/emailmessage.tex ============================================================================== --- python/trunk/Doc/lib/emailmessage.tex (original) +++ python/trunk/Doc/lib/emailmessage.tex Tue Jan 17 06:24:25 2006 @@ -435,10 +435,11 @@ \begin{methoddesc}[Message]{get_filename}{\optional{failobj}} Return the value of the \code{filename} parameter of the -\mailheader{Content-Disposition} header of the message, or \var{failobj} if -either the header is missing, or has no \code{filename} parameter. -The returned string will always be unquoted as per -\method{Utils.unquote()}. +\mailheader{Content-Disposition} header of the message. If the header does +not have a \code{filename} parameter, this method falls back to looking for +the \code{name} parameter. If neither is found, or the header is missing, +then \var{failobj} is returned. The returned string will always be unquoted +as per \method{Utils.unquote()}. \end{methoddesc} \begin{methoddesc}[Message]{get_boundary}{\optional{failobj}} From python-checkins at python.org Tue Jan 17 06:27:42 2006 From: python-checkins at python.org (neal.norwitz) Date: Tue, 17 Jan 2006 06:27:42 +0100 (CET) Subject: [Python-checkins] r42080 - python/trunk/Modules/bz2module.c Message-ID: <20060117052742.EF9881E4002@bag.python.org> Author: neal.norwitz Date: Tue Jan 17 06:27:39 2006 New Revision: 42080 Modified: python/trunk/Modules/bz2module.c Log: SF Bug #1407069, Remove extra semi-colon if there is no long long Modified: python/trunk/Modules/bz2module.c ============================================================================== --- python/trunk/Modules/bz2module.c (original) +++ python/trunk/Modules/bz2module.c Tue Jan 17 06:27:39 2006 @@ -54,7 +54,7 @@ (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32) #else #define BZS_TOTAL_OUT(bzs) \ - bzs->total_out_lo32; + bzs->total_out_lo32 #endif #else /* ! BZ_CONFIG_ERROR */ From python-checkins at python.org Tue Jan 17 06:28:08 2006 From: python-checkins at python.org (neal.norwitz) Date: Tue, 17 Jan 2006 06:28:08 +0100 (CET) Subject: [Python-checkins] r42081 - python/branches/release24-maint/Modules/bz2module.c Message-ID: <20060117052808.7E5AB1E4002@bag.python.org> Author: neal.norwitz Date: Tue Jan 17 06:28:07 2006 New Revision: 42081 Modified: python/branches/release24-maint/Modules/bz2module.c Log: SF Bug #1407069, Remove extra semi-colon if there is no long long Modified: python/branches/release24-maint/Modules/bz2module.c ============================================================================== --- python/branches/release24-maint/Modules/bz2module.c (original) +++ python/branches/release24-maint/Modules/bz2module.c Tue Jan 17 06:28:07 2006 @@ -54,7 +54,7 @@ (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32) #else #define BZS_TOTAL_OUT(bzs) \ - bzs->total_out_lo32; + bzs->total_out_lo32 #endif #else /* ! BZ_CONFIG_ERROR */ From python-checkins at python.org Tue Jan 17 06:29:56 2006 From: python-checkins at python.org (barry.warsaw) Date: Tue, 17 Jan 2006 06:29:56 +0100 (CET) Subject: [Python-checkins] r42082 - python/branches/release24-maint/Doc/lib/emailmessage.tex Message-ID: <20060117052956.BBB981E4002@bag.python.org> Author: barry.warsaw Date: Tue Jan 17 06:29:56 2006 New Revision: 42082 Modified: python/branches/release24-maint/Doc/lib/emailmessage.tex Log: Update documentation for email.Message.get_filename(). Modified: python/branches/release24-maint/Doc/lib/emailmessage.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/emailmessage.tex (original) +++ python/branches/release24-maint/Doc/lib/emailmessage.tex Tue Jan 17 06:29:56 2006 @@ -435,10 +435,11 @@ \begin{methoddesc}[Message]{get_filename}{\optional{failobj}} Return the value of the \code{filename} parameter of the -\mailheader{Content-Disposition} header of the message, or \var{failobj} if -either the header is missing, or has no \code{filename} parameter. -The returned string will always be unquoted as per -\method{Utils.unquote()}. +\mailheader{Content-Disposition} header of the message. If the header does +not have a \code{filename} parameter, this method falls back to looking for +the \code{name} parameter. If neither is found, or the header is missing, +then \var{failobj} is returned. The returned string will always be unquoted +as per \method{Utils.unquote()}. \end{methoddesc} \begin{methoddesc}[Message]{get_boundary}{\optional{failobj}} From python-checkins at python.org Tue Jan 17 06:58:09 2006 From: python-checkins at python.org (barry.warsaw) Date: Tue, 17 Jan 2006 06:58:09 +0100 (CET) Subject: [Python-checkins] r42083 - in python/trunk/Lib/email: FeedParser.py test/test_email.py Message-ID: <20060117055809.0B2FC1E4002@bag.python.org> Author: barry.warsaw Date: Tue Jan 17 06:58:08 2006 New Revision: 42083 Modified: python/trunk/Lib/email/FeedParser.py python/trunk/Lib/email/test/test_email.py Log: SF bug #1347874; FeedParser does not comply with RFC2822. Change headerRE as suggested in the bug report, so that single character headers are accepted. Test case added too. Will backport to Python 2.4. Modified: python/trunk/Lib/email/FeedParser.py ============================================================================== --- python/trunk/Lib/email/FeedParser.py (original) +++ python/trunk/Lib/email/FeedParser.py Tue Jan 17 06:58:08 2006 @@ -1,4 +1,4 @@ -# Copyright (C) 2004 Python Software Foundation +# Copyright (C) 2004-2006 Python Software Foundation # Authors: Baxter, Wouters and Warsaw # Contact: email-sig at python.org @@ -29,7 +29,7 @@ NLCRE_crack = re.compile('(\r\n|\r|\n)') # RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character # except controls, SP, and ":". -headerRE = re.compile(r'^(From |[\041-\071\073-\176]{2,}:|[\t ])') +headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])') EMPTYSTRING = '' NL = '\n' Modified: python/trunk/Lib/email/test/test_email.py ============================================================================== --- python/trunk/Lib/email/test/test_email.py (original) +++ python/trunk/Lib/email/test/test_email.py Tue Jan 17 06:58:08 2006 @@ -2467,6 +2467,15 @@ msg = email.message_from_string(m) eq(len(msg.keys()), 0) + def test_rfc2822_one_character_header(self): + eq = self.assertEqual + m = 'A: first header\nB: second header\nCC: third header\n\nbody' + msg = email.message_from_string(m) + headers = msg.keys() + headers.sort() + eq(headers, ['A', 'B', 'CC']) + eq(msg.get_payload(), 'body') + class TestBase64(unittest.TestCase): From python-checkins at python.org Tue Jan 17 06:59:20 2006 From: python-checkins at python.org (barry.warsaw) Date: Tue, 17 Jan 2006 06:59:20 +0100 (CET) Subject: [Python-checkins] r42084 - in python/branches/release24-maint/Lib/email: FeedParser.py test/test_email.py Message-ID: <20060117055920.A7D5A1E4002@bag.python.org> Author: barry.warsaw Date: Tue Jan 17 06:59:18 2006 New Revision: 42084 Modified: python/branches/release24-maint/Lib/email/FeedParser.py python/branches/release24-maint/Lib/email/test/test_email.py Log: SF bug #1347874; FeedParser does not comply with RFC2822. Change headerRE as suggested in the bug report, so that single character headers are accepted. Test case added too. Backported from the trunk. Modified: python/branches/release24-maint/Lib/email/FeedParser.py ============================================================================== --- python/branches/release24-maint/Lib/email/FeedParser.py (original) +++ python/branches/release24-maint/Lib/email/FeedParser.py Tue Jan 17 06:59:18 2006 @@ -1,4 +1,4 @@ -# Copyright (C) 2004 Python Software Foundation +# Copyright (C) 2004-2006 Python Software Foundation # Authors: Baxter, Wouters and Warsaw # Contact: email-sig at python.org @@ -29,7 +29,7 @@ NLCRE_crack = re.compile('(\r\n|\r|\n)') # RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character # except controls, SP, and ":". -headerRE = re.compile(r'^(From |[\041-\071\073-\176]{2,}:|[\t ])') +headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])') EMPTYSTRING = '' NL = '\n' Modified: python/branches/release24-maint/Lib/email/test/test_email.py ============================================================================== --- python/branches/release24-maint/Lib/email/test/test_email.py (original) +++ python/branches/release24-maint/Lib/email/test/test_email.py Tue Jan 17 06:59:18 2006 @@ -2467,6 +2467,15 @@ msg = email.message_from_string(m) eq(len(msg.keys()), 0) + def test_rfc2822_one_character_header(self): + eq = self.assertEqual + m = 'A: first header\nB: second header\nCC: third header\n\nbody' + msg = email.message_from_string(m) + headers = msg.keys() + headers.sort() + eq(headers, ['A', 'B', 'CC']) + eq(msg.get_payload(), 'body') + class TestBase64(unittest.TestCase): From python-checkins at python.org Tue Jan 17 07:55:52 2006 From: python-checkins at python.org (neal.norwitz) Date: Tue, 17 Jan 2006 07:55:52 +0100 (CET) Subject: [Python-checkins] r42085 - python/branches/ssize_t/Python/modsupport.c Message-ID: <20060117065552.422211E4010@bag.python.org> Author: neal.norwitz Date: Tue Jan 17 07:55:51 2006 New Revision: 42085 Modified: python/branches/ssize_t/Python/modsupport.c Log: AFAIK, chars are passed as ints on 64-bit arches too. Since we are formatting a char, this should not lose any bits. Modified: python/branches/ssize_t/Python/modsupport.c ============================================================================== --- python/branches/ssize_t/Python/modsupport.c (original) +++ python/branches/ssize_t/Python/modsupport.c Tue Jan 17 07:55:51 2006 @@ -376,7 +376,7 @@ case 'c': { char p[1]; - p[0] = va_arg(*p_va, int); + p[0] = (char)va_arg(*p_va, int); return PyString_FromStringAndSize(p, 1); } From skip at pobox.com Tue Jan 17 16:24:34 2006 From: skip at pobox.com (skip at pobox.com) Date: Tue, 17 Jan 2006 09:24:34 -0600 Subject: [Python-checkins] r42072 - python/trunk/Lib/test/test_logging.py In-Reply-To: References: <20060116212442.2D8291E4002@bag.python.org> Message-ID: <17357.3122.173952.210383@montanaro.dyndns.org> Neal> I'm not sure if it was this checkin, but one of the logging Neal> checkins appears to have broken the test run. The g5 failed at least in part because my music- and video-loving son filled the disk so test_largefile failed. I repaired that a bit yesterday... Skip From python-checkins at python.org Tue Jan 17 19:44:46 2006 From: python-checkins at python.org (phillip.eby) Date: Tue, 17 Jan 2006 19:44:46 +0100 (CET) Subject: [Python-checkins] r42086 - sandbox/trunk/setuptools/setuptools/package_index.py Message-ID: <20060117184446.E1D9E1E4002@bag.python.org> Author: phillip.eby Date: Tue Jan 17 19:44:46 2006 New Revision: 42086 Modified: sandbox/trunk/setuptools/setuptools/package_index.py Log: More sourceforge changes. :( Modified: sandbox/trunk/setuptools/setuptools/package_index.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/package_index.py (original) +++ sandbox/trunk/setuptools/setuptools/package_index.py Tue Jan 17 19:44:46 2006 @@ -564,7 +564,7 @@ f = self.open_url(url) match = re.search( - r' Author: phillip.eby Date: Tue Jan 17 19:47:56 2006 New Revision: 42087 Modified: sandbox/trunk/setuptools/setuptools/package_index.py Log: Fix editing error (reported by Ian Bicking). Modified: sandbox/trunk/setuptools/setuptools/package_index.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/package_index.py (original) +++ sandbox/trunk/setuptools/setuptools/package_index.py Tue Jan 17 19:47:56 2006 @@ -250,7 +250,7 @@ "scan is required.", url ) - def scan_all(self, msg, *args): + def scan_all(self, msg=None, *args): if self.index_url not in self.fetched_urls: if msg: self.warn(msg,*args) self.warn( From python-checkins at python.org Tue Jan 17 20:57:00 2006 From: python-checkins at python.org (phillip.eby) Date: Tue, 17 Jan 2006 20:57:00 +0100 (CET) Subject: [Python-checkins] r42088 - sandbox/trunk/setuptools/setuptools/package_index.py Message-ID: <20060117195700.872261E4002@bag.python.org> Author: phillip.eby Date: Tue Jan 17 20:56:59 2006 New Revision: 42088 Modified: sandbox/trunk/setuptools/setuptools/package_index.py Log: Scrape-proof Sourceforge mirror processing! Modified: sandbox/trunk/setuptools/setuptools/package_index.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/package_index.py (original) +++ sandbox/trunk/setuptools/setuptools/package_index.py Tue Jan 17 20:56:59 2006 @@ -134,9 +134,9 @@ def process_url(self, url, retrieve=False): """Evaluate a URL as a possible download, and maybe retrieve it""" + url = fix_sf_url(url) if url in self.scanned_urls and not retrieve: return - self.scanned_urls[url] = True if not URL_SCHEME(url): # process filenames or directories @@ -296,6 +296,36 @@ "; possible download problem?" ) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + def download(self, spec, tmpdir): """Locate and/or download `spec` to `tmpdir`, returning a local path @@ -502,8 +532,6 @@ def _download_html(self, url, headers, filename, tmpdir): - # Check for a sourceforge URL - sf_url = url.startswith('http://prdownloads.') file = open(filename) for line in file: if line.strip(): @@ -513,13 +541,6 @@ file.close() os.unlink(filename) return self._download_svn(url, filename) - # Check for a SourceForge header - elif sf_url: - page = ''.join(list(file)) - if '?use_mirror=' in page: - file.close() - os.unlink(filename) - return self._download_sourceforge(url, page, tmpdir) break # not an index page file.close() os.unlink(filename) @@ -541,43 +562,42 @@ log.warn(msg, *args) +def fix_sf_url(url): + scheme, server, path, param, query, frag = urlparse.urlparse(url) + if server!='prdownloads.sourceforge.net': + return url + return urlparse.urlunparse( + (scheme, 'dl.sourceforge.net', 'sourceforge'+path, param, '', frag) + ) + + + + + + + + + + + + + + + + + + + + + + - def _download_sourceforge(self, source_url, sf_page, tmpdir): - """Download package from randomly-selected SourceForge mirror""" - self.debug("Processing SourceForge mirror page") - mirror_regex = re.compile(r'HREF="?(/.*?\?use_mirror=[^">]*)', re.I) - urls = [m.group(1) for m in mirror_regex.finditer(sf_page)] - if not urls: - raise DistutilsError( - "URL looks like a Sourceforge mirror page, but no URLs found" - ) - import random - url = urlparse.urljoin(source_url, random.choice(urls)) - self.info( - "Requesting redirect to (randomly selected) %r mirror", - url.split('=',1)[-1] - ) - f = self.open_url(url) - match = re.search( - r'(?i) Author: fredrik.lundh Date: Tue Jan 17 22:31:31 2006 New Revision: 42089 Modified: python/trunk/Doc/lib/libasyncore.tex Log: fixed example: adding missing import, handle_close, test code, etc. Modified: python/trunk/Doc/lib/libasyncore.tex ============================================================================== --- python/trunk/Doc/lib/libasyncore.tex (original) +++ python/trunk/Doc/lib/libasyncore.tex Tue Jan 17 22:31:31 2006 @@ -222,29 +222,37 @@ \subsection{asyncore Example basic HTTP client \label{asyncore-example}} -As a basic example, below is a very basic HTTP client that uses the -\class{dispatcher} class to implement its socket handling: +Here is a very basic HTTP client that uses the \class{dispatcher} +class to implement its socket handling: \begin{verbatim} +import asyncore, socket + class http_client(asyncore.dispatcher): - def __init__(self, host,path): + + def __init__(self, host, path): asyncore.dispatcher.__init__(self) - self.path = path self.create_socket(socket.AF_INET, socket.SOCK_STREAM) self.connect( (host, 80) ) - self.buffer = 'GET %s HTTP/1.0\r\n\r\n' % self.path - + self.buffer = 'GET %s HTTP/1.0\r\n\r\n' % path + def handle_connect(self): pass - + + def handle_close(self): + self.close() + def handle_read(self): - data = self.recv(8192) - print data - + print self.recv(8192) + def writable(self): return (len(self.buffer) > 0) - + def handle_write(self): sent = self.send(self.buffer) self.buffer = self.buffer[sent:] + +c = http_client('www.python.org', '/') + +asyncore.loop() \end{verbatim} From python-checkins at python.org Wed Jan 18 10:13:54 2006 From: python-checkins at python.org (martin.v.loewis) Date: Wed, 18 Jan 2006 10:13:54 +0100 (CET) Subject: [Python-checkins] r42090 - in python/trunk: Modules/getbuildinfo.c PCbuild/make_buildinfo.vcproj PCbuild/pcbuild.sln PCbuild/pythoncore.vcproj Message-ID: <20060118091354.A31B91E4002@bag.python.org> Author: martin.v.loewis Date: Wed Jan 18 10:13:51 2006 New Revision: 42090 Added: python/trunk/PCbuild/make_buildinfo.vcproj Modified: python/trunk/Modules/getbuildinfo.c python/trunk/PCbuild/pcbuild.sln python/trunk/PCbuild/pythoncore.vcproj Log: Generate getbuildinfo.o each time the linker is invoked; try to generate SVNVERSION information if subwcrev.exe can be found. Modified: python/trunk/Modules/getbuildinfo.c ============================================================================== --- python/trunk/Modules/getbuildinfo.c (original) +++ python/trunk/Modules/getbuildinfo.c Wed Jan 18 10:13:51 2006 @@ -20,6 +20,10 @@ #endif #endif +#ifdef SUBWCREV +#define SVNVERSION "$WCRANGE$$WCMODS?M:$" +#endif + const char * Py_GetBuildInfo(void) { Added: python/trunk/PCbuild/make_buildinfo.vcproj ============================================================================== --- (empty file) +++ python/trunk/PCbuild/make_buildinfo.vcproj Wed Jan 18 10:13:51 2006 @@ -0,0 +1,122 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Modified: python/trunk/PCbuild/pcbuild.sln ============================================================================== Binary files. No diff available. Modified: python/trunk/PCbuild/pythoncore.vcproj ============================================================================== --- python/trunk/PCbuild/pythoncore.vcproj (original) +++ python/trunk/PCbuild/pythoncore.vcproj Wed Jan 18 10:13:51 2006 @@ -4,6 +4,7 @@ Version="7.10" Name="pythoncore" ProjectGUID="{7AFA1F0B-A8A1-455A-A832-BF263404BBEF}" + RootNamespace="pythoncore" SccProjectName="pythoncore" SccLocalPath=".."> @@ -37,6 +38,7 @@ Name="VCCustomBuildTool"/> + Name="VCPreLinkEventTool" + Description="generate buildinfo" + CommandLine="make_buildinfo.exe ${ConfigurationName}"/> + Name="VCPreLinkEventTool" + Description="generate buildinfo" + CommandLine="make_buildinfo.exe $(ConfigurationName)"/> + Name="VCPreLinkEventTool" + Description="generate buildinfo" + CommandLine="make_buildinfo.exe ${ConfigurationName}"/> - - - - - Author: martin.v.loewis Date: Wed Jan 18 20:18:51 2006 New Revision: 42091 Added: python/trunk/PCbuild/make_buildinfo.c Log: Add missing make_buildinfo.c. Added: python/trunk/PCbuild/make_buildinfo.c ============================================================================== --- (empty file) +++ python/trunk/PCbuild/make_buildinfo.c Wed Jan 18 20:18:51 2006 @@ -0,0 +1,87 @@ +#include +#include +#include +#include + +/* This file creates the getbuildinfo.o object, by first + invoking subwcrev.exe (if found), and then invoking cl.exe. + As a side effect, it might generate PCBuild\getbuildinfo2.c + also. If this isn't a subversion checkout, or subwcrev isn't + found, it compiles ..\\Modules\\getbuildinfo.c instead. + + Currently, subwcrev.exe is found from the registry entries + of TortoiseSVN. + + No attempt is made to place getbuildinfo.o into the proper + binary directory. This isn't necessary, as this tool is + invoked as a pre-link step for pythoncore, so that overwrites + any previous getbuildinfo.o. + +*/ + +int make_buildinfo2() +{ + struct _stat st; + HKEY hTortoise; + char command[500]; + DWORD type, size; + if (_stat(".svn", &st) < 0) + return 0; + if (RegOpenKey(HKEY_LOCAL_MACHINE, "Software\\TortoiseSVN", &hTortoise) != ERROR_SUCCESS && + RegOpenKey(HKEY_CURRENT_USER, "Software\\TortoiseSVN", &hTortoise) != ERROR_SUCCESS) + /* Tortoise not installed */ + return 0; + size = sizeof(command); + if (RegQueryValueEx(hTortoise, "Directory", 0, &type, command, &size) != ERROR_SUCCESS || + type != REG_SZ) + /* Registry corrupted */ + return 0; + strcat(command, "bin\\subwcrev.exe"); + if (_stat(command, &st) < 0) + /* subwcrev.exe not part of the release */ + return 0; + strcat(command, " .. ..\\Modules\\getbuildinfo.c getbuildinfo2.c"); + puts(command); fflush(stdout); + if (system(command) < 0) + return 0; + return 1; +} + +int main(int argc, char*argv[]) +{ + char command[500] = "cl.exe -c -D_WIN32 -DUSE_DL_EXPORT -D_WINDOWS -DWIN32 -D_WINDLL "; + int do_unlink, result; + if (argc != 2) { + fprintf(stderr, "make_buildinfo $(ConfigurationName)\n"); + return EXIT_FAILURE; + } + if (strcmp(argv[1], "Release") == 0) { + strcat(command, "-MD "); + //strcpy(targetdir, "x86-temp-debug"); + } + else if (strcmp(argv[1], "Debug") == 0) { + strcat(command, "-D_DEBUG -MDd "); + //strcpy(targetdir, "x86-temp-release"); + } + else if (strcmp(argv[1], "ReleaseItanium")) { + strcat(command, "-MD "); + //strcpy(targetdir, "ia64-temp-release"); + } + else { + fprintf(stderr, "unsupported configuration %s\n", argv[1]); + return EXIT_FAILURE; + } + + if ((do_unlink = make_buildinfo2())) + strcat(command, "getbuildinfo2.c -DSUBWCREV "); + else + strcat(command, "..\\Modules\\getbuildinfo.c"); + strcat(command, " -Fogetbuildinfo.o -I..\\Include -I..\\PC"); + puts(command); fflush(stdout); + result = system(command); + if (do_unlink) + unlink("getbuildinfo2.c"); + if (result < 0) + return EXIT_FAILURE; + return 0; +} \ No newline at end of file From python-checkins at python.org Wed Jan 18 21:04:04 2006 From: python-checkins at python.org (tim.peters) Date: Wed, 18 Jan 2006 21:04:04 +0100 (CET) Subject: [Python-checkins] r42092 - python/trunk/PCbuild/make_buildinfo.c Message-ID: <20060118200404.8B38B1E4002@bag.python.org> Author: tim.peters Date: Wed Jan 18 21:04:02 2006 New Revision: 42092 Modified: python/trunk/PCbuild/make_buildinfo.c Log: Quote the path to the executable before invoking system(). Modified: python/trunk/PCbuild/make_buildinfo.c ============================================================================== --- python/trunk/PCbuild/make_buildinfo.c (original) +++ python/trunk/PCbuild/make_buildinfo.c Wed Jan 18 21:04:02 2006 @@ -31,12 +31,13 @@ RegOpenKey(HKEY_CURRENT_USER, "Software\\TortoiseSVN", &hTortoise) != ERROR_SUCCESS) /* Tortoise not installed */ return 0; - size = sizeof(command); - if (RegQueryValueEx(hTortoise, "Directory", 0, &type, command, &size) != ERROR_SUCCESS || + command[0] = '"'; /* quote the path to the executable */ + size = sizeof(command) - 1; + if (RegQueryValueEx(hTortoise, "Directory", 0, &type, command+1, &size) != ERROR_SUCCESS || type != REG_SZ) /* Registry corrupted */ return 0; - strcat(command, "bin\\subwcrev.exe"); + strcat(command, "bin\\subwcrev.exe\""); if (_stat(command, &st) < 0) /* subwcrev.exe not part of the release */ return 0; From python-checkins at python.org Thu Jan 19 07:10:11 2006 From: python-checkins at python.org (neal.norwitz) Date: Thu, 19 Jan 2006 07:10:11 +0100 (CET) Subject: [Python-checkins] r42093 - in python/trunk: Modules/_bsddb.c Modules/_curses_panel.c Modules/_cursesmodule.c Modules/_elementtree.c Modules/_heapqmodule.c Modules/_localemodule.c Modules/_randommodule.c Modules/_sre.c Modules/_ssl.c Modules/_testcapimodule.c Modules/_tkinter.c Modules/almodule.c Modules/arraymodule.c Modules/audioop.c Modules/binascii.c Modules/bsddbmodule.c Modules/bz2module.c Modules/cPickle.c Modules/cStringIO.c Modules/cdmodule.c Modules/clmodule.c Modules/cmathmodule.c Modules/collectionsmodule.c Modules/datetimemodule.c Modules/dbmmodule.c Modules/dlmodule.c Modules/errnomodule.c Modules/fcntlmodule.c Modules/flmodule.c Modules/fmmodule.c Modules/fpectlmodule.c Modules/fpetestmodule.c Modules/functionalmodule.c Modules/gcmodule.c Modules/gdbmmodule.c Modules/grpmodule.c Modules/imageop.c Modules/imgfile.c Modules/itertoolsmodule.c Modules/linuxaudiodev.c Modules/mathmodule.c Modules/md5module.c Modules/mmapmodule.c Modules/nismodule.c Modules/operator.c Modules/ossaudiodev.c Modules/parsermodule.c Modules/posixmodule.c Modules/puremodule.c Modules/pwdmodule.c Modules/pyexpat.c Modules/readline.c Modules/regexmodule.c Modules/resource.c Modules/rgbimgmodule.c Modules/selectmodule.c Modules/sha256module.c Modules/sha512module.c Modules/shamodule.c Modules/signalmodule.c Modules/socketmodule.c Modules/spwdmodule.c Modules/stropmodule.c Modules/structmodule.c Modules/sunaudiodev.c Modules/svmodule.c Modules/symtablemodule.c Modules/syslogmodule.c Modules/termios.c Modules/threadmodule.c Modules/timemodule.c Modules/xxmodule.c Modules/zlibmodule.c PC/_subprocess.c PC/_winreg.c PC/msvcrtmodule.c PC/winsound.c Python/import.c Python/marshal.c Python/sysmodule.c Message-ID: <20060119061011.17A041E4002@bag.python.org> Author: neal.norwitz Date: Thu Jan 19 07:09:39 2006 New Revision: 42093 Modified: python/trunk/Modules/_bsddb.c python/trunk/Modules/_curses_panel.c python/trunk/Modules/_cursesmodule.c python/trunk/Modules/_elementtree.c python/trunk/Modules/_heapqmodule.c python/trunk/Modules/_localemodule.c python/trunk/Modules/_randommodule.c python/trunk/Modules/_sre.c python/trunk/Modules/_ssl.c python/trunk/Modules/_testcapimodule.c python/trunk/Modules/_tkinter.c python/trunk/Modules/almodule.c python/trunk/Modules/arraymodule.c python/trunk/Modules/audioop.c python/trunk/Modules/binascii.c python/trunk/Modules/bsddbmodule.c python/trunk/Modules/bz2module.c python/trunk/Modules/cPickle.c python/trunk/Modules/cStringIO.c python/trunk/Modules/cdmodule.c python/trunk/Modules/clmodule.c python/trunk/Modules/cmathmodule.c python/trunk/Modules/collectionsmodule.c python/trunk/Modules/datetimemodule.c python/trunk/Modules/dbmmodule.c python/trunk/Modules/dlmodule.c python/trunk/Modules/errnomodule.c python/trunk/Modules/fcntlmodule.c python/trunk/Modules/flmodule.c python/trunk/Modules/fmmodule.c python/trunk/Modules/fpectlmodule.c python/trunk/Modules/fpetestmodule.c python/trunk/Modules/functionalmodule.c python/trunk/Modules/gcmodule.c python/trunk/Modules/gdbmmodule.c python/trunk/Modules/grpmodule.c python/trunk/Modules/imageop.c python/trunk/Modules/imgfile.c python/trunk/Modules/itertoolsmodule.c python/trunk/Modules/linuxaudiodev.c python/trunk/Modules/mathmodule.c python/trunk/Modules/md5module.c python/trunk/Modules/mmapmodule.c python/trunk/Modules/nismodule.c python/trunk/Modules/operator.c python/trunk/Modules/ossaudiodev.c python/trunk/Modules/parsermodule.c python/trunk/Modules/posixmodule.c python/trunk/Modules/puremodule.c python/trunk/Modules/pwdmodule.c python/trunk/Modules/pyexpat.c python/trunk/Modules/readline.c python/trunk/Modules/regexmodule.c python/trunk/Modules/resource.c python/trunk/Modules/rgbimgmodule.c python/trunk/Modules/selectmodule.c python/trunk/Modules/sha256module.c python/trunk/Modules/sha512module.c python/trunk/Modules/shamodule.c python/trunk/Modules/signalmodule.c python/trunk/Modules/socketmodule.c python/trunk/Modules/spwdmodule.c python/trunk/Modules/stropmodule.c python/trunk/Modules/structmodule.c python/trunk/Modules/sunaudiodev.c python/trunk/Modules/svmodule.c python/trunk/Modules/symtablemodule.c python/trunk/Modules/syslogmodule.c python/trunk/Modules/termios.c python/trunk/Modules/threadmodule.c python/trunk/Modules/timemodule.c python/trunk/Modules/xxmodule.c python/trunk/Modules/zlibmodule.c python/trunk/PC/_subprocess.c python/trunk/PC/_winreg.c python/trunk/PC/msvcrtmodule.c python/trunk/PC/winsound.c python/trunk/Python/import.c python/trunk/Python/marshal.c python/trunk/Python/sysmodule.c Log: Check return result from Py_InitModule*(). This API can fail. Probably should be backported. Modified: python/trunk/Modules/_bsddb.c ============================================================================== --- python/trunk/Modules/_bsddb.c (original) +++ python/trunk/Modules/_bsddb.c Thu Jan 19 07:09:39 2006 @@ -5034,6 +5034,8 @@ /* Create the module and add the functions */ m = Py_InitModule(_bsddbModuleName, bsddb_methods); + if (m == NULL) + return; /* Add some symbolic constants to the module */ d = PyModule_GetDict(m); Modified: python/trunk/Modules/_curses_panel.c ============================================================================== --- python/trunk/Modules/_curses_panel.c (original) +++ python/trunk/Modules/_curses_panel.c Thu Jan 19 07:09:39 2006 @@ -462,6 +462,8 @@ /* Create the module and add the functions */ m = Py_InitModule("_curses_panel", PyCurses_methods); + if (m == NULL) + return; d = PyModule_GetDict(m); /* For exception _curses_panel.error */ Modified: python/trunk/Modules/_cursesmodule.c ============================================================================== --- python/trunk/Modules/_cursesmodule.c (original) +++ python/trunk/Modules/_cursesmodule.c Thu Jan 19 07:09:39 2006 @@ -2481,6 +2481,8 @@ /* Create the module and add the functions */ m = Py_InitModule("_curses", PyCurses_methods); + if (m == NULL) + return; /* Add some symbolic constants to the module */ d = PyModule_GetDict(m); Modified: python/trunk/Modules/_elementtree.c ============================================================================== --- python/trunk/Modules/_elementtree.c (original) +++ python/trunk/Modules/_elementtree.c Thu Jan 19 07:09:39 2006 @@ -2590,6 +2590,8 @@ #endif m = Py_InitModule("_elementtree", _functions); + if (m == NULL) + return; /* python glue code */ Modified: python/trunk/Modules/_heapqmodule.c ============================================================================== --- python/trunk/Modules/_heapqmodule.c (original) +++ python/trunk/Modules/_heapqmodule.c Thu Jan 19 07:09:39 2006 @@ -610,6 +610,8 @@ PyObject *m; m = Py_InitModule3("_heapq", heapq_methods, module_doc); + if (m == NULL) + return; PyModule_AddObject(m, "__about__", PyString_FromString(__about__)); } Modified: python/trunk/Modules/_localemodule.c ============================================================================== --- python/trunk/Modules/_localemodule.c (original) +++ python/trunk/Modules/_localemodule.c Thu Jan 19 07:09:39 2006 @@ -715,6 +715,8 @@ #endif m = Py_InitModule("_locale", PyLocale_Methods); + if (m == NULL) + return; d = PyModule_GetDict(m); Modified: python/trunk/Modules/_randommodule.c ============================================================================== --- python/trunk/Modules/_randommodule.c (original) +++ python/trunk/Modules/_randommodule.c Thu Jan 19 07:09:39 2006 @@ -573,6 +573,8 @@ if (PyType_Ready(&Random_Type) < 0) return; m = Py_InitModule3("_random", NULL, module_doc); + if (m == NULL) + return; Py_INCREF(&Random_Type); PyModule_AddObject(m, "Random", (PyObject *)&Random_Type); } Modified: python/trunk/Modules/_sre.c ============================================================================== --- python/trunk/Modules/_sre.c (original) +++ python/trunk/Modules/_sre.c Thu Jan 19 07:09:39 2006 @@ -3389,6 +3389,8 @@ Scanner_Type.ob_type = &PyType_Type; m = Py_InitModule("_" SRE_MODULE, _functions); + if (m == NULL) + return; d = PyModule_GetDict(m); x = PyInt_FromLong(SRE_MAGIC); Modified: python/trunk/Modules/_ssl.c ============================================================================== --- python/trunk/Modules/_ssl.c (original) +++ python/trunk/Modules/_ssl.c Thu Jan 19 07:09:39 2006 @@ -634,6 +634,8 @@ PySSL_Type.ob_type = &PyType_Type; m = Py_InitModule3("_ssl", PySSL_methods, module_doc); + if (m == NULL) + return; d = PyModule_GetDict(m); /* Load _socket module and its C API */ Modified: python/trunk/Modules/_testcapimodule.c ============================================================================== --- python/trunk/Modules/_testcapimodule.c (original) +++ python/trunk/Modules/_testcapimodule.c Thu Jan 19 07:09:39 2006 @@ -627,6 +627,8 @@ PyObject *m; m = Py_InitModule("_testcapi", TestMethods); + if (m == NULL) + return; PyModule_AddObject(m, "UCHAR_MAX", PyInt_FromLong(UCHAR_MAX)); PyModule_AddObject(m, "USHRT_MAX", PyInt_FromLong(USHRT_MAX)); Modified: python/trunk/Modules/_tkinter.c ============================================================================== --- python/trunk/Modules/_tkinter.c (original) +++ python/trunk/Modules/_tkinter.c Thu Jan 19 07:09:39 2006 @@ -3088,6 +3088,8 @@ #endif m = Py_InitModule("_tkinter", moduleMethods); + if (m == NULL) + return; d = PyModule_GetDict(m); Tkinter_TclError = PyErr_NewException("_tkinter.TclError", NULL, NULL); Modified: python/trunk/Modules/almodule.c ============================================================================== --- python/trunk/Modules/almodule.c (original) +++ python/trunk/Modules/almodule.c Thu Jan 19 07:09:39 2006 @@ -1996,6 +1996,8 @@ m = Py_InitModule4("al", al_methods, al_module_documentation, (PyObject*)NULL,PYTHON_API_VERSION); + if (m == NULL) + return; /* Add some symbolic constants to the module */ d = PyModule_GetDict(m); Modified: python/trunk/Modules/arraymodule.c ============================================================================== --- python/trunk/Modules/arraymodule.c (original) +++ python/trunk/Modules/arraymodule.c Thu Jan 19 07:09:39 2006 @@ -2118,6 +2118,8 @@ Arraytype.ob_type = &PyType_Type; PyArrayIter_Type.ob_type = &PyType_Type; m = Py_InitModule3("array", a_methods, module_doc); + if (m == NULL) + return; Py_INCREF((PyObject *)&Arraytype); PyModule_AddObject(m, "ArrayType", (PyObject *)&Arraytype); Modified: python/trunk/Modules/audioop.c ============================================================================== --- python/trunk/Modules/audioop.c (original) +++ python/trunk/Modules/audioop.c Thu Jan 19 07:09:39 2006 @@ -1374,6 +1374,8 @@ { PyObject *m, *d; m = Py_InitModule("audioop", audioop_methods); + if (m == NULL) + return; d = PyModule_GetDict(m); AudioopError = PyErr_NewException("audioop.error", NULL, NULL); if (AudioopError != NULL) Modified: python/trunk/Modules/binascii.c ============================================================================== --- python/trunk/Modules/binascii.c (original) +++ python/trunk/Modules/binascii.c Thu Jan 19 07:09:39 2006 @@ -1335,6 +1335,8 @@ /* Create the module and add the functions */ m = Py_InitModule("binascii", binascii_module_methods); + if (m == NULL) + return; d = PyModule_GetDict(m); x = PyString_FromString(doc_binascii); Modified: python/trunk/Modules/bsddbmodule.c ============================================================================== --- python/trunk/Modules/bsddbmodule.c (original) +++ python/trunk/Modules/bsddbmodule.c Thu Jan 19 07:09:39 2006 @@ -849,6 +849,8 @@ Bsddbtype.ob_type = &PyType_Type; m = Py_InitModule("bsddb185", bsddbmodule_methods); + if (m == NULL) + return; d = PyModule_GetDict(m); BsddbError = PyErr_NewException("bsddb.error", NULL, NULL); if (BsddbError != NULL) Modified: python/trunk/Modules/bz2module.c ============================================================================== --- python/trunk/Modules/bz2module.c (original) +++ python/trunk/Modules/bz2module.c Thu Jan 19 07:09:39 2006 @@ -2192,6 +2192,8 @@ BZ2Decomp_Type.ob_type = &PyType_Type; m = Py_InitModule3("bz2", bz2_methods, bz2__doc__); + if (m == NULL) + return; PyModule_AddObject(m, "__author__", PyString_FromString(__author__)); Modified: python/trunk/Modules/cPickle.c ============================================================================== --- python/trunk/Modules/cPickle.c (original) +++ python/trunk/Modules/cPickle.c Thu Jan 19 07:09:39 2006 @@ -5730,6 +5730,8 @@ m = Py_InitModule4("cPickle", cPickle_methods, cPickle_module_documentation, (PyObject*)NULL,PYTHON_API_VERSION); + if (m == NULL) + return; /* Add some symbolic constants to the module */ d = PyModule_GetDict(m); Modified: python/trunk/Modules/cStringIO.c ============================================================================== --- python/trunk/Modules/cStringIO.c (original) +++ python/trunk/Modules/cStringIO.c Thu Jan 19 07:09:39 2006 @@ -716,6 +716,7 @@ m = Py_InitModule4("cStringIO", IO_methods, cStringIO_module_documentation, (PyObject*)NULL,PYTHON_API_VERSION); + if (m == NULL) return; /* Add some symbolic constants to the module */ d = PyModule_GetDict(m); Modified: python/trunk/Modules/cdmodule.c ============================================================================== --- python/trunk/Modules/cdmodule.c (original) +++ python/trunk/Modules/cdmodule.c Thu Jan 19 07:09:39 2006 @@ -760,6 +760,8 @@ PyObject *m, *d; m = Py_InitModule("cd", CD_methods); + if (m == NULL) + return; d = PyModule_GetDict(m); CdError = PyErr_NewException("cd.error", NULL, NULL); Modified: python/trunk/Modules/clmodule.c ============================================================================== --- python/trunk/Modules/clmodule.c (original) +++ python/trunk/Modules/clmodule.c Thu Jan 19 07:09:39 2006 @@ -963,6 +963,8 @@ PyObject *m, *d, *x; m = Py_InitModule("cl", cl_methods); + if (m == NULL) + return; d = PyModule_GetDict(m); ClError = PyErr_NewException("cl.error", NULL, NULL); Modified: python/trunk/Modules/cmathmodule.c ============================================================================== --- python/trunk/Modules/cmathmodule.c (original) +++ python/trunk/Modules/cmathmodule.c Thu Jan 19 07:09:39 2006 @@ -417,6 +417,8 @@ PyObject *m; m = Py_InitModule3("cmath", cmath_methods, module_doc); + if (m == NULL) + return; PyModule_AddObject(m, "pi", PyFloat_FromDouble(atan(1.0) * 4.0)); Modified: python/trunk/Modules/collectionsmodule.c ============================================================================== --- python/trunk/Modules/collectionsmodule.c (original) +++ python/trunk/Modules/collectionsmodule.c Thu Jan 19 07:09:39 2006 @@ -1077,6 +1077,8 @@ PyObject *m; m = Py_InitModule3("collections", NULL, module_doc); + if (m == NULL) + return; if (PyType_Ready(&deque_type) < 0) return; Modified: python/trunk/Modules/datetimemodule.c ============================================================================== --- python/trunk/Modules/datetimemodule.c (original) +++ python/trunk/Modules/datetimemodule.c Thu Jan 19 07:09:39 2006 @@ -4615,6 +4615,8 @@ m = Py_InitModule3("datetime", module_methods, "Fast implementation of the datetime type."); + if (m == NULL) + return; if (PyType_Ready(&PyDateTime_DateType) < 0) return; Modified: python/trunk/Modules/dbmmodule.c ============================================================================== --- python/trunk/Modules/dbmmodule.c (original) +++ python/trunk/Modules/dbmmodule.c Thu Jan 19 07:09:39 2006 @@ -359,6 +359,8 @@ Dbmtype.ob_type = &PyType_Type; m = Py_InitModule("dbm", dbmmodule_methods); + if (m == NULL) + return; d = PyModule_GetDict(m); if (DbmError == NULL) DbmError = PyErr_NewException("dbm.error", NULL, NULL); Modified: python/trunk/Modules/dlmodule.c ============================================================================== --- python/trunk/Modules/dlmodule.c (original) +++ python/trunk/Modules/dlmodule.c Thu Jan 19 07:09:39 2006 @@ -219,6 +219,8 @@ /* Create the module and add the functions */ m = Py_InitModule("dl", dl_methods); + if (m == NULL) + return; /* Add some symbolic constants to the module */ d = PyModule_GetDict(m); Modified: python/trunk/Modules/errnomodule.c ============================================================================== --- python/trunk/Modules/errnomodule.c (original) +++ python/trunk/Modules/errnomodule.c Thu Jan 19 07:09:39 2006 @@ -57,6 +57,8 @@ { PyObject *m, *d, *de; m = Py_InitModule3("errno", errno_methods, errno__doc__); + if (m == NULL) + return; d = PyModule_GetDict(m); de = PyDict_New(); if (!d || !de || PyDict_SetItemString(d, "errorcode", de) < 0) Modified: python/trunk/Modules/fcntlmodule.c ============================================================================== --- python/trunk/Modules/fcntlmodule.c (original) +++ python/trunk/Modules/fcntlmodule.c Thu Jan 19 07:09:39 2006 @@ -583,6 +583,8 @@ /* Create the module and add the functions and documentation */ m = Py_InitModule3("fcntl", fcntl_methods, module_doc); + if (m == NULL) + return; /* Add some symbolic constants to the module */ d = PyModule_GetDict(m); Modified: python/trunk/Modules/flmodule.c ============================================================================== --- python/trunk/Modules/flmodule.c (original) +++ python/trunk/Modules/flmodule.c Thu Jan 19 07:09:39 2006 @@ -2130,6 +2130,8 @@ initfl(void) { Py_InitModule("fl", forms_methods); + if (m == NULL) + return; foreground(); fl_init(); } Modified: python/trunk/Modules/fmmodule.c ============================================================================== --- python/trunk/Modules/fmmodule.c (original) +++ python/trunk/Modules/fmmodule.c Thu Jan 19 07:09:39 2006 @@ -258,5 +258,7 @@ initfm(void) { Py_InitModule("fm", fm_methods); + if (m == NULL) + return; fminit(); } Modified: python/trunk/Modules/fpectlmodule.c ============================================================================== --- python/trunk/Modules/fpectlmodule.c (original) +++ python/trunk/Modules/fpectlmodule.c Thu Jan 19 07:09:39 2006 @@ -265,6 +265,8 @@ { PyObject *m, *d; m = Py_InitModule("fpectl", fpectl_methods); + if (m == NULL) + return; d = PyModule_GetDict(m); fpe_error = PyErr_NewException("fpectl.error", NULL, NULL); if (fpe_error != NULL) Modified: python/trunk/Modules/fpetestmodule.c ============================================================================== --- python/trunk/Modules/fpetestmodule.c (original) +++ python/trunk/Modules/fpetestmodule.c Thu Jan 19 07:09:39 2006 @@ -177,6 +177,8 @@ PyObject *m, *d; m = Py_InitModule("fpetest", fpetest_methods); + if (m == NULL) + return; d = PyModule_GetDict(m); fpe_error = PyErr_NewException("fpetest.error", NULL, NULL); if (fpe_error != NULL) Modified: python/trunk/Modules/functionalmodule.c ============================================================================== --- python/trunk/Modules/functionalmodule.c (original) +++ python/trunk/Modules/functionalmodule.c Thu Jan 19 07:09:39 2006 @@ -263,6 +263,8 @@ }; m = Py_InitModule3("functional", module_methods, module_doc); + if (m == NULL) + return; for (i=0 ; typelist[i] != NULL ; i++) { if (PyType_Ready(typelist[i]) < 0) Modified: python/trunk/Modules/gcmodule.c ============================================================================== --- python/trunk/Modules/gcmodule.c (original) +++ python/trunk/Modules/gcmodule.c Thu Jan 19 07:09:39 2006 @@ -1158,6 +1158,8 @@ gc__doc__, NULL, PYTHON_API_VERSION); + if (m == NULL) + return; if (garbage == NULL) { garbage = PyList_New(0); Modified: python/trunk/Modules/gdbmmodule.c ============================================================================== --- python/trunk/Modules/gdbmmodule.c (original) +++ python/trunk/Modules/gdbmmodule.c Thu Jan 19 07:09:39 2006 @@ -512,6 +512,8 @@ m = Py_InitModule4("gdbm", dbmmodule_methods, gdbmmodule__doc__, (PyObject *)NULL, PYTHON_API_VERSION); + if (m == NULL) + return; d = PyModule_GetDict(m); DbmError = PyErr_NewException("gdbm.error", NULL, NULL); if (DbmError != NULL) { Modified: python/trunk/Modules/grpmodule.c ============================================================================== --- python/trunk/Modules/grpmodule.c (original) +++ python/trunk/Modules/grpmodule.c Thu Jan 19 07:09:39 2006 @@ -171,6 +171,8 @@ { PyObject *m, *d; m = Py_InitModule3("grp", grp_methods, grp__doc__); + if (m == NULL) + return; d = PyModule_GetDict(m); PyStructSequence_InitType(&StructGrpType, &struct_group_type_desc); PyDict_SetItemString(d, "struct_group", (PyObject *) &StructGrpType); Modified: python/trunk/Modules/imageop.c ============================================================================== --- python/trunk/Modules/imageop.c (original) +++ python/trunk/Modules/imageop.c Thu Jan 19 07:09:39 2006 @@ -776,6 +776,8 @@ { PyObject *m; m = Py_InitModule("imageop", imageop_methods); + if (m == NULL) + return; ImageopDict = PyModule_GetDict(m); ImageopError = PyErr_NewException("imageop.error", NULL, NULL); if (ImageopError != NULL) Modified: python/trunk/Modules/imgfile.c ============================================================================== --- python/trunk/Modules/imgfile.c (original) +++ python/trunk/Modules/imgfile.c Thu Jan 19 07:09:39 2006 @@ -492,6 +492,8 @@ { PyObject *m, *d; m = Py_InitModule("imgfile", imgfile_methods); + if (m == NULL) + return; d = PyModule_GetDict(m); ImgfileError = PyErr_NewException("imgfile.error", NULL, NULL); if (ImgfileError != NULL) Modified: python/trunk/Modules/itertoolsmodule.c ============================================================================== --- python/trunk/Modules/itertoolsmodule.c (original) +++ python/trunk/Modules/itertoolsmodule.c Thu Jan 19 07:09:39 2006 @@ -2460,6 +2460,8 @@ teedataobject_type.ob_type = &PyType_Type; m = Py_InitModule3("itertools", module_methods, module_doc); + if (m == NULL) + return; for (i=0 ; typelist[i] != NULL ; i++) { if (PyType_Ready(typelist[i]) < 0) Modified: python/trunk/Modules/linuxaudiodev.c ============================================================================== --- python/trunk/Modules/linuxaudiodev.c (original) +++ python/trunk/Modules/linuxaudiodev.c Thu Jan 19 07:09:39 2006 @@ -491,6 +491,8 @@ PyObject *m; m = Py_InitModule("linuxaudiodev", linuxaudiodev_methods); + if (m == NULL) + return; LinuxAudioError = PyErr_NewException("linuxaudiodev.error", NULL, NULL); if (LinuxAudioError) Modified: python/trunk/Modules/mathmodule.c ============================================================================== --- python/trunk/Modules/mathmodule.c (original) +++ python/trunk/Modules/mathmodule.c Thu Jan 19 07:09:39 2006 @@ -355,6 +355,8 @@ PyObject *m, *d, *v; m = Py_InitModule3("math", math_methods, module_doc); + if (m == NULL) + goto finally; d = PyModule_GetDict(m); if (!(v = PyFloat_FromDouble(atan(1.0) * 4.0))) Modified: python/trunk/Modules/md5module.c ============================================================================== --- python/trunk/Modules/md5module.c (original) +++ python/trunk/Modules/md5module.c Thu Jan 19 07:09:39 2006 @@ -303,6 +303,8 @@ if (PyType_Ready(&MD5type) < 0) return; m = Py_InitModule3("_md5", md5_functions, module_doc); + if (m == NULL) + return; d = PyModule_GetDict(m); PyDict_SetItemString(d, "MD5Type", (PyObject *)&MD5type); PyModule_AddIntConstant(m, "digest_size", 16); Modified: python/trunk/Modules/mmapmodule.c ============================================================================== --- python/trunk/Modules/mmapmodule.c (original) +++ python/trunk/Modules/mmapmodule.c Thu Jan 19 07:09:39 2006 @@ -1092,6 +1092,8 @@ mmap_object_type.ob_type = &PyType_Type; module = Py_InitModule ("mmap", mmap_functions); + if (module == NULL) + return; dict = PyModule_GetDict (module); mmap_module_error = PyExc_EnvironmentError; Py_INCREF(mmap_module_error); Modified: python/trunk/Modules/nismodule.c ============================================================================== --- python/trunk/Modules/nismodule.c (original) +++ python/trunk/Modules/nismodule.c Thu Jan 19 07:09:39 2006 @@ -379,6 +379,8 @@ { PyObject *m, *d; m = Py_InitModule("nis", nis_methods); + if (m == NULL) + return; d = PyModule_GetDict(m); NisError = PyErr_NewException("nis.error", NULL, NULL); if (NisError != NULL) Modified: python/trunk/Modules/operator.c ============================================================================== --- python/trunk/Modules/operator.c (original) +++ python/trunk/Modules/operator.c Thu Jan 19 07:09:39 2006 @@ -578,6 +578,8 @@ /* Create the module and add the functions */ m = Py_InitModule4("operator", operator_methods, operator_doc, (PyObject*)NULL, PYTHON_API_VERSION); + if (m == NULL) + return; if (PyType_Ready(&itemgetter_type) < 0) return; Modified: python/trunk/Modules/ossaudiodev.c ============================================================================== --- python/trunk/Modules/ossaudiodev.c (original) +++ python/trunk/Modules/ossaudiodev.c Thu Jan 19 07:09:39 2006 @@ -963,6 +963,8 @@ PyObject *m; m = Py_InitModule("ossaudiodev", ossaudiodev_methods); + if (m == NULL) + return; OSSAudioError = PyErr_NewException("ossaudiodev.OSSAudioError", NULL, NULL); Modified: python/trunk/Modules/parsermodule.c ============================================================================== --- python/trunk/Modules/parsermodule.c (original) +++ python/trunk/Modules/parsermodule.c Thu Jan 19 07:09:39 2006 @@ -3148,6 +3148,8 @@ PyST_Type.ob_type = &PyType_Type; module = Py_InitModule("parser", parser_functions); + if (module == NULL) + return; if (parser_error == 0) parser_error = PyErr_NewException("parser.ParserError", NULL, NULL); Modified: python/trunk/Modules/posixmodule.c ============================================================================== --- python/trunk/Modules/posixmodule.c (original) +++ python/trunk/Modules/posixmodule.c Thu Jan 19 07:09:39 2006 @@ -7967,6 +7967,8 @@ m = Py_InitModule3(MODNAME, posix_methods, posix__doc__); + if (m == NULL) + return; /* Initialize environ dictionary */ v = convertenviron(); Modified: python/trunk/Modules/puremodule.c ============================================================================== --- python/trunk/Modules/puremodule.c (original) +++ python/trunk/Modules/puremodule.c Thu Jan 19 07:09:39 2006 @@ -952,6 +952,8 @@ PyObject *m, *d; m = Py_InitModule("pure", pure_methods); + if (m == NULL) + return; d = PyModule_GetDict(m); /* this is bogus because we should be able to find this information Modified: python/trunk/Modules/pwdmodule.c ============================================================================== --- python/trunk/Modules/pwdmodule.c (original) +++ python/trunk/Modules/pwdmodule.c Thu Jan 19 07:09:39 2006 @@ -183,6 +183,8 @@ { PyObject *m; m = Py_InitModule3("pwd", pwd_methods, pwd__doc__); + if (m == NULL) + return; PyStructSequence_InitType(&StructPwdType, &struct_pwd_type_desc); Py_INCREF((PyObject *) &StructPwdType); Modified: python/trunk/Modules/pyexpat.c ============================================================================== --- python/trunk/Modules/pyexpat.c (original) +++ python/trunk/Modules/pyexpat.c Thu Jan 19 07:09:39 2006 @@ -1854,6 +1854,8 @@ /* Create the module and add the functions */ m = Py_InitModule3(MODULE_NAME, pyexpat_methods, pyexpat_module_documentation); + if (m == NULL) + return; /* Add some symbolic constants to the module */ if (ErrorObject == NULL) { Modified: python/trunk/Modules/readline.c ============================================================================== --- python/trunk/Modules/readline.c (original) +++ python/trunk/Modules/readline.c Thu Jan 19 07:09:39 2006 @@ -925,6 +925,8 @@ m = Py_InitModule4("readline", readline_methods, doc_module, (PyObject *)NULL, PYTHON_API_VERSION); + if (m == NULL) + return; PyOS_ReadlineFunctionPointer = call_readline; setup_readline(); Modified: python/trunk/Modules/regexmodule.c ============================================================================== --- python/trunk/Modules/regexmodule.c (original) +++ python/trunk/Modules/regexmodule.c Thu Jan 19 07:09:39 2006 @@ -653,6 +653,8 @@ Regextype.ob_type = &PyType_Type; m = Py_InitModule("regex", regex_global_methods); + if (m == NULL) + return; d = PyModule_GetDict(m); if (PyErr_Warn(PyExc_DeprecationWarning, Modified: python/trunk/Modules/resource.c ============================================================================== --- python/trunk/Modules/resource.c (original) +++ python/trunk/Modules/resource.c Thu Jan 19 07:09:39 2006 @@ -234,6 +234,8 @@ /* Create the module and add the functions */ m = Py_InitModule("resource", resource_methods); + if (m == NULL) + return; /* Add some symbolic constants to the module */ if (ResourceError == NULL) { Modified: python/trunk/Modules/rgbimgmodule.c ============================================================================== --- python/trunk/Modules/rgbimgmodule.c (original) +++ python/trunk/Modules/rgbimgmodule.c Thu Jan 19 07:09:39 2006 @@ -756,6 +756,8 @@ { PyObject *m, *d; m = Py_InitModule("rgbimg", rgbimg_methods); + if (m == NULL) + return; d = PyModule_GetDict(m); ImgfileError = PyErr_NewException("rgbimg.error", NULL, NULL); if (ImgfileError != NULL) Modified: python/trunk/Modules/selectmodule.c ============================================================================== --- python/trunk/Modules/selectmodule.c (original) +++ python/trunk/Modules/selectmodule.c Thu Jan 19 07:09:39 2006 @@ -662,6 +662,8 @@ { PyObject *m; m = Py_InitModule3("select", select_methods, module_doc); + if (m == NULL) + return; SelectError = PyErr_NewException("select.error", NULL, NULL); Py_INCREF(SelectError); Modified: python/trunk/Modules/sha256module.c ============================================================================== --- python/trunk/Modules/sha256module.c (original) +++ python/trunk/Modules/sha256module.c Thu Jan 19 07:09:39 2006 @@ -706,4 +706,6 @@ if (PyType_Ready(&SHA256type) < 0) return; m = Py_InitModule("_sha256", SHA_functions); + if (m == NULL) + return; } Modified: python/trunk/Modules/sha512module.c ============================================================================== --- python/trunk/Modules/sha512module.c (original) +++ python/trunk/Modules/sha512module.c Thu Jan 19 07:09:39 2006 @@ -772,6 +772,8 @@ if (PyType_Ready(&SHA512type) < 0) return; m = Py_InitModule("_sha512", SHA_functions); + if (m == NULL) + return; } #endif Modified: python/trunk/Modules/shamodule.c ============================================================================== --- python/trunk/Modules/shamodule.c (original) +++ python/trunk/Modules/shamodule.c Thu Jan 19 07:09:39 2006 @@ -590,6 +590,8 @@ if (PyType_Ready(&SHAtype) < 0) return; m = Py_InitModule("_sha", SHA_functions); + if (m == NULL) + return; /* Add some symbolic constants to the module */ insint("blocksize", 1); /* For future use, in case some hash Modified: python/trunk/Modules/signalmodule.c ============================================================================== --- python/trunk/Modules/signalmodule.c (original) +++ python/trunk/Modules/signalmodule.c Thu Jan 19 07:09:39 2006 @@ -317,6 +317,8 @@ /* Create the module and add the functions */ m = Py_InitModule3("signal", signal_methods, module_doc); + if (m == NULL) + return; /* Add some symbolic constants to the module */ d = PyModule_GetDict(m); Modified: python/trunk/Modules/socketmodule.c ============================================================================== --- python/trunk/Modules/socketmodule.c (original) +++ python/trunk/Modules/socketmodule.c Thu Jan 19 07:09:39 2006 @@ -3871,6 +3871,8 @@ m = Py_InitModule3(PySocket_MODULE_NAME, socket_methods, socket_doc); + if (m == NULL) + return; socket_error = PyErr_NewException("socket.error", NULL, NULL); if (socket_error == NULL) Modified: python/trunk/Modules/spwdmodule.c ============================================================================== --- python/trunk/Modules/spwdmodule.c (original) +++ python/trunk/Modules/spwdmodule.c Thu Jan 19 07:09:39 2006 @@ -171,6 +171,8 @@ { PyObject *m; m=Py_InitModule3("spwd", spwd_methods, spwd__doc__); + if (m == NULL) + return; PyStructSequence_InitType(&StructSpwdType, &struct_spwd_type_desc); Py_INCREF((PyObject *) &StructSpwdType); PyModule_AddObject(m, "struct_spwd", (PyObject *) &StructSpwdType); Modified: python/trunk/Modules/stropmodule.c ============================================================================== --- python/trunk/Modules/stropmodule.c (original) +++ python/trunk/Modules/stropmodule.c Thu Jan 19 07:09:39 2006 @@ -1210,6 +1210,8 @@ int c, n; m = Py_InitModule4("strop", strop_methods, strop_module__doc__, (PyObject*)NULL, PYTHON_API_VERSION); + if (m == NULL) + return; /* Create 'whitespace' object */ n = 0; Modified: python/trunk/Modules/structmodule.c ============================================================================== --- python/trunk/Modules/structmodule.c (original) +++ python/trunk/Modules/structmodule.c Thu Jan 19 07:09:39 2006 @@ -1278,6 +1278,8 @@ /* Create the module and add the functions */ m = Py_InitModule4("struct", struct_methods, struct__doc__, (PyObject*)NULL, PYTHON_API_VERSION); + if (m == NULL) + return; /* Add some symbolic constants to the module */ if (StructError == NULL) { Modified: python/trunk/Modules/sunaudiodev.c ============================================================================== --- python/trunk/Modules/sunaudiodev.c (original) +++ python/trunk/Modules/sunaudiodev.c Thu Jan 19 07:09:39 2006 @@ -456,6 +456,8 @@ PyObject *m, *d; m = Py_InitModule("sunaudiodev", sunaudiodev_methods); + if (m == NULL) + return; d = PyModule_GetDict(m); SunAudioError = PyErr_NewException("sunaudiodev.error", NULL, NULL); if (SunAudioError) Modified: python/trunk/Modules/svmodule.c ============================================================================== --- python/trunk/Modules/svmodule.c (original) +++ python/trunk/Modules/svmodule.c Thu Jan 19 07:09:39 2006 @@ -956,6 +956,8 @@ PyObject *m, *d; m = Py_InitModule("sv", sv_methods); + if (m == NULL) + return; d = PyModule_GetDict(m); SvError = PyErr_NewException("sv.error", NULL, NULL); Modified: python/trunk/Modules/symtablemodule.c ============================================================================== --- python/trunk/Modules/symtablemodule.c (original) +++ python/trunk/Modules/symtablemodule.c Thu Jan 19 07:09:39 2006 @@ -53,6 +53,8 @@ PyObject *m; m = Py_InitModule("_symtable", symtable_methods); + if (m == NULL) + return; PyModule_AddIntConstant(m, "USE", USE); PyModule_AddIntConstant(m, "DEF_GLOBAL", DEF_GLOBAL); PyModule_AddIntConstant(m, "DEF_LOCAL", DEF_LOCAL); Modified: python/trunk/Modules/syslogmodule.c ============================================================================== --- python/trunk/Modules/syslogmodule.c (original) +++ python/trunk/Modules/syslogmodule.c Thu Jan 19 07:09:39 2006 @@ -163,6 +163,8 @@ /* Create the module and add the functions */ m = Py_InitModule("syslog", syslog_methods); + if (m == NULL) + return; /* Add some symbolic constants to the module */ Modified: python/trunk/Modules/termios.c ============================================================================== --- python/trunk/Modules/termios.c (original) +++ python/trunk/Modules/termios.c Thu Jan 19 07:09:39 2006 @@ -910,6 +910,8 @@ m = Py_InitModule4("termios", termios_methods, termios__doc__, (PyObject *)NULL, PYTHON_API_VERSION); + if (m == NULL) + return; if (TermiosError == NULL) { TermiosError = PyErr_NewException("termios.error", NULL, NULL); Modified: python/trunk/Modules/threadmodule.c ============================================================================== --- python/trunk/Modules/threadmodule.c (original) +++ python/trunk/Modules/threadmodule.c Thu Jan 19 07:09:39 2006 @@ -638,6 +638,8 @@ /* Create the module and add the functions */ m = Py_InitModule3("thread", thread_methods, thread_doc); + if (m == NULL) + return; /* Add a symbolic constant */ d = PyModule_GetDict(m); Modified: python/trunk/Modules/timemodule.c ============================================================================== --- python/trunk/Modules/timemodule.c (original) +++ python/trunk/Modules/timemodule.c Thu Jan 19 07:09:39 2006 @@ -785,6 +785,8 @@ PyObject *m; char *p; m = Py_InitModule3("time", time_methods, module_doc); + if (m == NULL) + return; /* Accept 2-digit dates unless PYTHONY2K is set and non-empty */ p = Py_GETENV("PYTHONY2K"); Modified: python/trunk/Modules/xxmodule.c ============================================================================== --- python/trunk/Modules/xxmodule.c (original) +++ python/trunk/Modules/xxmodule.c Thu Jan 19 07:09:39 2006 @@ -352,6 +352,8 @@ /* Create the module and add the functions */ m = Py_InitModule3("xx", xx_methods, module_doc); + if (m == NULL) + return; /* Add some symbolic constants to the module */ if (ErrorObject == NULL) { Modified: python/trunk/Modules/zlibmodule.c ============================================================================== --- python/trunk/Modules/zlibmodule.c (original) +++ python/trunk/Modules/zlibmodule.c Thu Jan 19 07:09:39 2006 @@ -878,6 +878,8 @@ m = Py_InitModule4("zlib", zlib_methods, zlib_module_documentation, (PyObject*)NULL,PYTHON_API_VERSION); + if (m == NULL) + return; ZlibError = PyErr_NewException("zlib.error", NULL, NULL); if (ZlibError != NULL) { Modified: python/trunk/PC/_subprocess.c ============================================================================== --- python/trunk/PC/_subprocess.c (original) +++ python/trunk/PC/_subprocess.c Thu Jan 19 07:09:39 2006 @@ -553,6 +553,8 @@ sp_handle_as_number.nb_int = (unaryfunc) sp_handle_as_int; m = Py_InitModule("_subprocess", sp_functions); + if (m == NULL) + return; d = PyModule_GetDict(m); /* constants */ Modified: python/trunk/PC/_winreg.c ============================================================================== --- python/trunk/PC/_winreg.c (original) +++ python/trunk/PC/_winreg.c Thu Jan 19 07:09:39 2006 @@ -1459,6 +1459,8 @@ { PyObject *m, *d; m = Py_InitModule3("_winreg", winreg_methods, module_doc); + if (m == NULL) + return; d = PyModule_GetDict(m); PyHKEY_Type.ob_type = &PyType_Type; PyHKEY_Type.tp_doc = PyHKEY_doc; Modified: python/trunk/PC/msvcrtmodule.c ============================================================================== --- python/trunk/PC/msvcrtmodule.c (original) +++ python/trunk/PC/msvcrtmodule.c Thu Jan 19 07:09:39 2006 @@ -221,6 +221,8 @@ initmsvcrt(void) { PyObject *m = Py_InitModule("msvcrt", msvcrt_functions); + if (m == NULL) + return; PyObject *d = PyModule_GetDict(m); /* constants for the locking() function's mode argument */ Modified: python/trunk/PC/winsound.c ============================================================================== --- python/trunk/PC/winsound.c (original) +++ python/trunk/PC/winsound.c Thu Jan 19 07:09:39 2006 @@ -220,6 +220,8 @@ PyObject *module = Py_InitModule3("winsound", sound_methods, sound_module_doc); + if (module == NULL) + return; PyObject *dict = PyModule_GetDict(module); ADD_DEFINE(SND_ASYNC); Modified: python/trunk/Python/import.c ============================================================================== --- python/trunk/Python/import.c (original) +++ python/trunk/Python/import.c Thu Jan 19 07:09:39 2006 @@ -2817,6 +2817,8 @@ m = Py_InitModule4("imp", imp_methods, doc_imp, NULL, PYTHON_API_VERSION); + if (m == NULL) + goto failure; d = PyModule_GetDict(m); if (setint(d, "SEARCH_ERROR", SEARCH_ERROR) < 0) goto failure; Modified: python/trunk/Python/marshal.c ============================================================================== --- python/trunk/Python/marshal.c (original) +++ python/trunk/Python/marshal.c Thu Jan 19 07:09:39 2006 @@ -1107,5 +1107,7 @@ PyMarshal_Init(void) { PyObject *mod = Py_InitModule("marshal", marshal_methods); + if (mod == NULL) + return; PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION); } Modified: python/trunk/Python/sysmodule.c ============================================================================== --- python/trunk/Python/sysmodule.c (original) +++ python/trunk/Python/sysmodule.c Thu Jan 19 07:09:39 2006 @@ -1027,6 +1027,8 @@ #endif m = Py_InitModule3("sys", sys_methods, sys_doc); + if (m == NULL) + return NULL; sysdict = PyModule_GetDict(m); { From python-checkins at python.org Thu Jan 19 08:09:10 2006 From: python-checkins at python.org (brett.cannon) Date: Thu, 19 Jan 2006 08:09:10 +0100 (CET) Subject: [Python-checkins] r42094 - python/trunk/Lib/test/test__locale.py Message-ID: <20060119070910.236211E4002@bag.python.org> Author: brett.cannon Date: Thu Jan 19 08:09:09 2006 New Revision: 42094 Modified: python/trunk/Lib/test/test__locale.py Log: Add a more informative error message for test_float_parsing so the failing locale can be known. Modified: python/trunk/Lib/test/test__locale.py ============================================================================== --- python/trunk/Lib/test/test__locale.py (original) +++ python/trunk/Lib/test/test__locale.py Thu Jan 19 08:09:09 2006 @@ -105,8 +105,10 @@ setlocale(LC_NUMERIC, loc) except Error: continue - self.assertEquals(int(eval('3.14') * 100), 314) - self.assertEquals(int(float('3.14') * 100), 314) + self.assertEquals(int(eval('3.14') * 100), 314, + "using eval('3.14') failed for %s" % loc) + self.assertEquals(int(float('3.14') * 100), 314, + "using float('3.14') failed for %s" % loc) From python-checkins at python.org Thu Jan 19 16:21:30 2006 From: python-checkins at python.org (tim.peters) Date: Thu, 19 Jan 2006 16:21:30 +0100 (CET) Subject: [Python-checkins] r42095 - python/trunk/PC/msvcrtmodule.c Message-ID: <20060119152130.CC4B71E4082@bag.python.org> Author: tim.peters Date: Thu Jan 19 16:21:30 2006 New Revision: 42095 Modified: python/trunk/PC/msvcrtmodule.c Log: initmsvcrt(): This no longer compiled on Windows, because a recent change inserted code before an auto declaration. Modified: python/trunk/PC/msvcrtmodule.c ============================================================================== --- python/trunk/PC/msvcrtmodule.c (original) +++ python/trunk/PC/msvcrtmodule.c Thu Jan 19 16:21:30 2006 @@ -220,10 +220,11 @@ PyMODINIT_FUNC initmsvcrt(void) { + PyObject *d; PyObject *m = Py_InitModule("msvcrt", msvcrt_functions); if (m == NULL) return; - PyObject *d = PyModule_GetDict(m); + d = PyModule_GetDict(m); /* constants for the locking() function's mode argument */ insertint(d, "LK_LOCK", _LK_LOCK); From python-checkins at python.org Thu Jan 19 16:25:09 2006 From: python-checkins at python.org (tim.peters) Date: Thu, 19 Jan 2006 16:25:09 +0100 (CET) Subject: [Python-checkins] r42096 - python/trunk/PC/winsound.c Message-ID: <20060119152509.B8C6C1E4002@bag.python.org> Author: tim.peters Date: Thu Jan 19 16:25:07 2006 New Revision: 42096 Modified: python/trunk/PC/winsound.c Log: initwinsound(): this no longer compiled on Windows, because code snuck in between auto declarations. Modified: python/trunk/PC/winsound.c ============================================================================== --- python/trunk/PC/winsound.c (original) +++ python/trunk/PC/winsound.c Thu Jan 19 16:25:07 2006 @@ -217,12 +217,13 @@ { OSVERSIONINFO version; + PyObject *dict; PyObject *module = Py_InitModule3("winsound", sound_methods, sound_module_doc); if (module == NULL) return; - PyObject *dict = PyModule_GetDict(module); + dict = PyModule_GetDict(module); ADD_DEFINE(SND_ASYNC); ADD_DEFINE(SND_NODEFAULT); From python-checkins at python.org Thu Jan 19 17:17:32 2006 From: python-checkins at python.org (martin.v.loewis) Date: Thu, 19 Jan 2006 17:17:32 +0100 (CET) Subject: [Python-checkins] r42097 - python/trunk/PCbuild/make_buildinfo.c Message-ID: <20060119161732.ABCF41E4007@bag.python.org> Author: martin.v.loewis Date: Thu Jan 19 17:17:31 2006 New Revision: 42097 Modified: python/trunk/PCbuild/make_buildinfo.c Log: Pass unquoted string to stat. Modified: python/trunk/PCbuild/make_buildinfo.c ============================================================================== --- python/trunk/PCbuild/make_buildinfo.c (original) +++ python/trunk/PCbuild/make_buildinfo.c Thu Jan 19 17:17:31 2006 @@ -37,11 +37,11 @@ type != REG_SZ) /* Registry corrupted */ return 0; - strcat(command, "bin\\subwcrev.exe\""); - if (_stat(command, &st) < 0) + strcat(command, "bin\\subwcrev.exe"); + if (_stat(command+1, &st) < 0) /* subwcrev.exe not part of the release */ return 0; - strcat(command, " .. ..\\Modules\\getbuildinfo.c getbuildinfo2.c"); + strcat(command, "\" .. ..\\Modules\\getbuildinfo.c getbuildinfo2.c"); puts(command); fflush(stdout); if (system(command) < 0) return 0; From python-checkins at python.org Fri Jan 20 04:30:37 2006 From: python-checkins at python.org (fred.drake) Date: Fri, 20 Jan 2006 04:30:37 +0100 (CET) Subject: [Python-checkins] r42098 - python/trunk/Doc/lib/libgetopt.tex Message-ID: <20060120033037.D4D171E4002@bag.python.org> Author: fred.drake Date: Fri Jan 20 04:30:36 2006 New Revision: 42098 Modified: python/trunk/Doc/lib/libgetopt.tex Log: add missing version identification Modified: python/trunk/Doc/lib/libgetopt.tex ============================================================================== --- python/trunk/Doc/lib/libgetopt.tex (original) +++ python/trunk/Doc/lib/libgetopt.tex Fri Jan 20 04:30:36 2006 @@ -65,6 +65,8 @@ If the first character of the option string is `+', or if the environment variable POSIXLY_CORRECT is set, then option processing stops as soon as a non-option argument is encountered. + +\versionadded{2.3} \end{funcdesc} \begin{excdesc}{GetoptError} From python-checkins at python.org Fri Jan 20 04:31:17 2006 From: python-checkins at python.org (fred.drake) Date: Fri, 20 Jan 2006 04:31:17 +0100 (CET) Subject: [Python-checkins] r42099 - python/branches/release24-maint/Doc/lib/libgetopt.tex Message-ID: <20060120033117.A757A1E4002@bag.python.org> Author: fred.drake Date: Fri Jan 20 04:31:17 2006 New Revision: 42099 Modified: python/branches/release24-maint/Doc/lib/libgetopt.tex Log: add missing version identification Modified: python/branches/release24-maint/Doc/lib/libgetopt.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libgetopt.tex (original) +++ python/branches/release24-maint/Doc/lib/libgetopt.tex Fri Jan 20 04:31:17 2006 @@ -65,6 +65,8 @@ If the first character of the option string is `+', or if the environment variable POSIXLY_CORRECT is set, then option processing stops as soon as a non-option argument is encountered. + +\versionadded{2.3} \end{funcdesc} \begin{excdesc}{GetoptError} From python-checkins at python.org Fri Jan 20 10:07:39 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 10:07:39 +0100 (CET) Subject: [Python-checkins] r42100 - in python/trunk: Lib/locale.py Misc/NEWS Message-ID: <20060120090739.884851E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 20 10:07:35 2006 New Revision: 42100 Modified: python/trunk/Lib/locale.py python/trunk/Misc/NEWS Log: Bug #1371247: Update Windows LCIDs in locale.py. Modified: python/trunk/Lib/locale.py ============================================================================== --- python/trunk/Lib/locale.py (original) +++ python/trunk/Lib/locale.py Fri Jan 20 10:07:35 2006 @@ -275,7 +275,7 @@ """ code = normalize(localename) - if '@' in localename: + if '@' in code: # Deal with locale modifiers code, modifier = code.split('@') if modifier == 'euro' and '.' not in code: @@ -1210,46 +1210,193 @@ } # -# this maps windows language identifiers (as used on Windows 95 and -# earlier) to locale strings. +# This maps Windows language identifiers to locale strings. # -# NOTE: this mapping is incomplete. If your language is missing, please -# submit a bug report to Python bug manager, which you can find via: -# http://www.python.org/dev/ -# Make sure you include the missing language identifier and the suggested -# locale code. +# This list has been updated from +# http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp +# to include every locale up to Windows XP. # windows_locale = { - 0x0404: "zh_TW", # Chinese (Taiwan) - 0x0804: "zh_CN", # Chinese (PRC) + 0x0436: "af_ZA", # Afrikaans + 0x041c: "sq_AL", # Albanian + 0x0401: "ar_SA", # Arabic - Saudi Arabia + 0x0801: "ar_IQ", # Arabic - Iraq + 0x0c01: "ar_EG", # Arabic - Egypt + 0x1001: "ar_LY", # Arabic - Libya + 0x1401: "ar_DZ", # Arabic - Algeria + 0x1801: "ar_MA", # Arabic - Morocco + 0x1c01: "ar_TN", # Arabic - Tunisia + 0x2001: "ar_OM", # Arabic - Oman + 0x2401: "ar_YE", # Arabic - Yemen + 0x2801: "ar_SY", # Arabic - Syria + 0x2c01: "ar_JO", # Arabic - Jordan + 0x3001: "ar_LB", # Arabic - Lebanon + 0x3401: "ar_KW", # Arabic - Kuwait + 0x3801: "ar_AE", # Arabic - United Arab Emirates + 0x3c01: "ar_BH", # Arabic - Bahrain + 0x4001: "ar_QA", # Arabic - Qatar + 0x042b: "hy_AM", # Armenian + 0x042c: "az_AZ", # Azeri Latin + 0x082c: "az_AZ", # Azeri - Cyrillic + 0x042d: "eu_ES", # Basque + 0x0423: "be_BY", # Belarusian + 0x0445: "bn_IN", # Begali + 0x201a: "bs_BA", # Bosnian + 0x141a: "bs_BA", # Bosnian - Cyrillic + 0x047e: "br_FR", # Breton - France + 0x0402: "bg_BG", # Bulgarian + 0x0403: "ca_ES", # Catalan + 0x0004: "zh_CHS",# Chinese - Simplified + 0x0404: "zh_TW", # Chinese - Taiwan + 0x0804: "zh_CN", # Chinese - PRC + 0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R. + 0x1004: "zh_SG", # Chinese - Singapore + 0x1404: "zh_MO", # Chinese - Macao S.A.R. + 0x7c04: "zh_CHT",# Chinese - Traditional + 0x041a: "hr_HR", # Croatian + 0x101a: "hr_BA", # Croatian - Bosnia + 0x0405: "cs_CZ", # Czech 0x0406: "da_DK", # Danish - 0x0413: "nl_NL", # Dutch (Netherlands) - 0x0409: "en_US", # English (United States) - 0x0809: "en_UK", # English (United Kingdom) - 0x0c09: "en_AU", # English (Australian) - 0x1009: "en_CA", # English (Canadian) - 0x1409: "en_NZ", # English (New Zealand) - 0x1809: "en_IE", # English (Ireland) - 0x1c09: "en_ZA", # English (South Africa) + 0x048c: "gbz_AF",# Dari - Afghanistan + 0x0465: "div_MV",# Divehi - Maldives + 0x0413: "nl_NL", # Dutch - The Netherlands + 0x0813: "nl_BE", # Dutch - Belgium + 0x0409: "en_US", # English - United States + 0x0809: "en_GB", # English - United Kingdom + 0x0c09: "en_AU", # English - Australia + 0x1009: "en_CA", # English - Canada + 0x1409: "en_NZ", # English - New Zealand + 0x1809: "en_IE", # English - Ireland + 0x1c09: "en_ZA", # English - South Africa + 0x2009: "en_JA", # English - Jamaica + 0x2409: "en_CB", # English - Carribbean + 0x2809: "en_BZ", # English - Belize + 0x2c09: "en_TT", # English - Trinidad + 0x3009: "en_ZW", # English - Zimbabwe + 0x3409: "en_PH", # English - Phillippines + 0x0425: "et_EE", # Estonian + 0x0438: "fo_FO", # Faroese + 0x0464: "fil_PH",# Filipino 0x040b: "fi_FI", # Finnish - 0x040c: "fr_FR", # French (Standard) - 0x080c: "fr_BE", # French (Belgian) - 0x0c0c: "fr_CA", # French (Canadian) - 0x100c: "fr_CH", # French (Switzerland) - 0x0407: "de_DE", # German (Standard) + 0x040c: "fr_FR", # French - France + 0x080c: "fr_BE", # French - Belgium + 0x0c0c: "fr_CA", # French - Canada + 0x100c: "fr_CH", # French - Switzerland + 0x140c: "fr_LU", # French - Luxembourg + 0x180c: "fr_MC", # French - Monaco + 0x0462: "fy_NL", # Frisian - Netherlands + 0x0456: "gl_ES", # Galician + 0x0437: "ka_GE", # Georgian + 0x0407: "de_DE", # German - Germany + 0x0807: "de_CH", # German - Switzerland + 0x0c07: "de_AT", # German - Austria + 0x1007: "de_LU", # German - Luxembourg + 0x1407: "de_LI", # German - Liechtenstein 0x0408: "el_GR", # Greek - 0x040d: "iw_IL", # Hebrew + 0x0447: "gu_IN", # Gujarati + 0x040d: "he_IL", # Hebrew + 0x0439: "hi_IN", # Hindi + 0x040e: "hu_HU", # Hungarian 0x040f: "is_IS", # Icelandic - 0x0410: "it_IT", # Italian (Standard) - 0x0411: "ja_JA", # Japanese - 0x0414: "no_NO", # Norwegian (Bokmal) - 0x0816: "pt_PT", # Portuguese (Standard) - 0x0c0a: "es_ES", # Spanish (Modern Sort) - 0x0441: "sw_KE", # Swahili (Kenya) - 0x041d: "sv_SE", # Swedish - 0x081d: "sv_FI", # Swedish (Finland) + 0x0421: "id_ID", # Indonesian + 0x045d: "iu_CA", # Inuktitut + 0x085d: "iu_CA", # Inuktitut - Latin + 0x083c: "ga_IE", # Irish - Ireland + 0x0434: "xh_ZA", # Xhosa - South Africa + 0x0435: "zu_ZA", # Zulu + 0x0410: "it_IT", # Italian - Italy + 0x0810: "it_CH", # Italian - Switzerland + 0x0411: "ja_JP", # Japanese + 0x044b: "kn_IN", # Kannada - India + 0x043f: "kk_KZ", # Kazakh + 0x0457: "kok_IN",# Konkani + 0x0412: "ko_KR", # Korean + 0x0440: "ky_KG", # Kyrgyz + 0x0426: "lv_LV", # Latvian + 0x0427: "lt_LT", # Lithuanian + 0x046e: "lb_LU", # Luxembourgish + 0x042f: "mk_MK", # FYRO Macedonian + 0x043e: "ms_MY", # Malay - Malaysia + 0x083e: "ms_BN", # Malay - Brunei + 0x044c: "ml_IN", # Malayalam - India + 0x043a: "mt_MT", # Maltese + 0x0481: "mi_NZ", # Maori + 0x047a: "arn_CL",# Mapudungun + 0x044e: "mr_IN", # Marathi + 0x047c: "moh_CA",# Mohawk - Canada + 0x0450: "mn_MN", # Mongolian + 0x0461: "ne_NP", # Nepali + 0x0414: "nb_NO", # Norwegian - Bokmal + 0x0814: "nn_NO", # Norwegian - Nynorsk + 0x0482: "oc_FR", # Occitan - France + 0x0448: "or_IN", # Oriya - India + 0x0463: "ps_AF", # Pashto - Afghanistan + 0x0429: "fa_IR", # Persian + 0x0415: "pl_PL", # Polish + 0x0416: "pt_BR", # Portuguese - Brazil + 0x0816: "pt_PT", # Portuguese - Portugal + 0x0446: "pa_IN", # Punjabi + 0x046b: "quz_BO",# Quechua (Bolivia) + 0x086b: "quz_EC",# Quechua (Ecuador) + 0x0c6b: "quz_PE",# Quechua (Peru) + 0x0418: "ro_RO", # Romanian - Romania + 0x0417: "rm_CH", # Raeto-Romanese + 0x0419: "ru_RU", # Russian + 0x243b: "smn_FI",# Sami Finland + 0x103b: "smj_NO",# Sami Norway + 0x143b: "smj_SE",# Sami Sweden + 0x043b: "se_NO", # Sami Northern Norway + 0x083b: "se_SE", # Sami Northern Sweden + 0x0c3b: "se_FI", # Sami Northern Finland + 0x203b: "sms_FI",# Sami Skolt + 0x183b: "sma_NO",# Sami Southern Norway + 0x1c3b: "sma_SE",# Sami Southern Sweden + 0x044f: "sa_IN", # Sanskrit + 0x0c1a: "sr_SP", # Serbian - Cyrillic + 0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic + 0x081a: "sr_SP", # Serbian - Latin + 0x181a: "sr_BA", # Serbian - Bosnia Latin + 0x046c: "ns_ZA", # Northern Sotho + 0x0432: "tn_ZA", # Setswana - Southern Africa + 0x041b: "sk_SK", # Slovak + 0x0424: "sl_SI", # Slovenian + 0x040a: "es_ES", # Spanish - Spain + 0x080a: "es_MX", # Spanish - Mexico + 0x0c0a: "es_ES", # Spanish - Spain (Modern) + 0x100a: "es_GT", # Spanish - Guatemala + 0x140a: "es_CR", # Spanish - Costa Rica + 0x180a: "es_PA", # Spanish - Panama + 0x1c0a: "es_DO", # Spanish - Dominican Republic + 0x200a: "es_VE", # Spanish - Venezuela + 0x240a: "es_CO", # Spanish - Colombia + 0x280a: "es_PE", # Spanish - Peru + 0x2c0a: "es_AR", # Spanish - Argentina + 0x300a: "es_EC", # Spanish - Ecuador + 0x340a: "es_CL", # Spanish - Chile + 0x380a: "es_UR", # Spanish - Uruguay + 0x3c0a: "es_PY", # Spanish - Paraguay + 0x400a: "es_BO", # Spanish - Bolivia + 0x440a: "es_SV", # Spanish - El Salvador + 0x480a: "es_HN", # Spanish - Honduras + 0x4c0a: "es_NI", # Spanish - Nicaragua + 0x500a: "es_PR", # Spanish - Puerto Rico + 0x0441: "sw_KE", # Swahili + 0x041d: "sv_SE", # Swedish - Sweden + 0x081d: "sv_FI", # Swedish - Finland + 0x045a: "syr_SY",# Syriac + 0x0449: "ta_IN", # Tamil + 0x0444: "tt_RU", # Tatar + 0x044a: "te_IN", # Telugu + 0x041e: "th_TH", # Thai 0x041f: "tr_TR", # Turkish + 0x0422: "uk_UA", # Ukrainian + 0x0420: "ur_PK", # Urdu + 0x0820: "ur_IN", # Urdu - India + 0x0443: "uz_UZ", # Uzbek - Latin + 0x0843: "uz_UZ", # Uzbek - Cyrillic + 0x042a: "vi_VN", # Vietnamese + 0x0452: "cy_GB", # Welsh } def _print_locale(): Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Fri Jan 20 10:07:35 2006 @@ -337,12 +337,14 @@ Library ------- +- Bug #1371247: Update Windows locale identifiers in locale.py. + - Bug #1394565: SimpleHTTPServer now doesn't choke on query parameters any more. - + - Bug #1403410: The warnings module now doesn't get confused when it can't find out the module name it generates a warning for. - + - Patch #1177307: Added a new codec utf_8_sig for UTF-8 with a BOM signature. - Patch #1157027: cookielib mishandles RFC 2109 cookies in Netscape mode From python-checkins at python.org Fri Jan 20 10:07:56 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 10:07:56 +0100 (CET) Subject: [Python-checkins] r42101 - in python/branches/release24-maint: Lib/locale.py Misc/NEWS Message-ID: <20060120090756.632E01E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 20 10:07:55 2006 New Revision: 42101 Modified: python/branches/release24-maint/Lib/locale.py python/branches/release24-maint/Misc/NEWS Log: Bug #1371247: Update Windows LCIDs in locale.py. Modified: python/branches/release24-maint/Lib/locale.py ============================================================================== --- python/branches/release24-maint/Lib/locale.py (original) +++ python/branches/release24-maint/Lib/locale.py Fri Jan 20 10:07:55 2006 @@ -262,7 +262,7 @@ """ code = normalize(localename) - if '@' in localename: + if '@' in code: # Deal with locale modifiers code, modifier = code.split('@') if modifier == 'euro' and '.' not in code: @@ -661,46 +661,193 @@ } # -# this maps windows language identifiers (as used on Windows 95 and -# earlier) to locale strings. +# This maps Windows language identifiers to locale strings. # -# NOTE: this mapping is incomplete. If your language is missing, please -# submit a bug report to Python bug manager, which you can find via: -# http://www.python.org/dev/ -# Make sure you include the missing language identifier and the suggested -# locale code. +# This list has been updated from +# http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp +# to include every locale up to Windows XP. # windows_locale = { - 0x0404: "zh_TW", # Chinese (Taiwan) - 0x0804: "zh_CN", # Chinese (PRC) + 0x0436: "af_ZA", # Afrikaans + 0x041c: "sq_AL", # Albanian + 0x0401: "ar_SA", # Arabic - Saudi Arabia + 0x0801: "ar_IQ", # Arabic - Iraq + 0x0c01: "ar_EG", # Arabic - Egypt + 0x1001: "ar_LY", # Arabic - Libya + 0x1401: "ar_DZ", # Arabic - Algeria + 0x1801: "ar_MA", # Arabic - Morocco + 0x1c01: "ar_TN", # Arabic - Tunisia + 0x2001: "ar_OM", # Arabic - Oman + 0x2401: "ar_YE", # Arabic - Yemen + 0x2801: "ar_SY", # Arabic - Syria + 0x2c01: "ar_JO", # Arabic - Jordan + 0x3001: "ar_LB", # Arabic - Lebanon + 0x3401: "ar_KW", # Arabic - Kuwait + 0x3801: "ar_AE", # Arabic - United Arab Emirates + 0x3c01: "ar_BH", # Arabic - Bahrain + 0x4001: "ar_QA", # Arabic - Qatar + 0x042b: "hy_AM", # Armenian + 0x042c: "az_AZ", # Azeri Latin + 0x082c: "az_AZ", # Azeri - Cyrillic + 0x042d: "eu_ES", # Basque + 0x0423: "be_BY", # Belarusian + 0x0445: "bn_IN", # Begali + 0x201a: "bs_BA", # Bosnian + 0x141a: "bs_BA", # Bosnian - Cyrillic + 0x047e: "br_FR", # Breton - France + 0x0402: "bg_BG", # Bulgarian + 0x0403: "ca_ES", # Catalan + 0x0004: "zh_CHS",# Chinese - Simplified + 0x0404: "zh_TW", # Chinese - Taiwan + 0x0804: "zh_CN", # Chinese - PRC + 0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R. + 0x1004: "zh_SG", # Chinese - Singapore + 0x1404: "zh_MO", # Chinese - Macao S.A.R. + 0x7c04: "zh_CHT",# Chinese - Traditional + 0x041a: "hr_HR", # Croatian + 0x101a: "hr_BA", # Croatian - Bosnia + 0x0405: "cs_CZ", # Czech 0x0406: "da_DK", # Danish - 0x0413: "nl_NL", # Dutch (Netherlands) - 0x0409: "en_US", # English (United States) - 0x0809: "en_UK", # English (United Kingdom) - 0x0c09: "en_AU", # English (Australian) - 0x1009: "en_CA", # English (Canadian) - 0x1409: "en_NZ", # English (New Zealand) - 0x1809: "en_IE", # English (Ireland) - 0x1c09: "en_ZA", # English (South Africa) + 0x048c: "gbz_AF",# Dari - Afghanistan + 0x0465: "div_MV",# Divehi - Maldives + 0x0413: "nl_NL", # Dutch - The Netherlands + 0x0813: "nl_BE", # Dutch - Belgium + 0x0409: "en_US", # English - United States + 0x0809: "en_GB", # English - United Kingdom + 0x0c09: "en_AU", # English - Australia + 0x1009: "en_CA", # English - Canada + 0x1409: "en_NZ", # English - New Zealand + 0x1809: "en_IE", # English - Ireland + 0x1c09: "en_ZA", # English - South Africa + 0x2009: "en_JA", # English - Jamaica + 0x2409: "en_CB", # English - Carribbean + 0x2809: "en_BZ", # English - Belize + 0x2c09: "en_TT", # English - Trinidad + 0x3009: "en_ZW", # English - Zimbabwe + 0x3409: "en_PH", # English - Phillippines + 0x0425: "et_EE", # Estonian + 0x0438: "fo_FO", # Faroese + 0x0464: "fil_PH",# Filipino 0x040b: "fi_FI", # Finnish - 0x040c: "fr_FR", # French (Standard) - 0x080c: "fr_BE", # French (Belgian) - 0x0c0c: "fr_CA", # French (Canadian) - 0x100c: "fr_CH", # French (Switzerland) - 0x0407: "de_DE", # German (Standard) + 0x040c: "fr_FR", # French - France + 0x080c: "fr_BE", # French - Belgium + 0x0c0c: "fr_CA", # French - Canada + 0x100c: "fr_CH", # French - Switzerland + 0x140c: "fr_LU", # French - Luxembourg + 0x180c: "fr_MC", # French - Monaco + 0x0462: "fy_NL", # Frisian - Netherlands + 0x0456: "gl_ES", # Galician + 0x0437: "ka_GE", # Georgian + 0x0407: "de_DE", # German - Germany + 0x0807: "de_CH", # German - Switzerland + 0x0c07: "de_AT", # German - Austria + 0x1007: "de_LU", # German - Luxembourg + 0x1407: "de_LI", # German - Liechtenstein 0x0408: "el_GR", # Greek - 0x040d: "iw_IL", # Hebrew + 0x0447: "gu_IN", # Gujarati + 0x040d: "he_IL", # Hebrew + 0x0439: "hi_IN", # Hindi + 0x040e: "hu_HU", # Hungarian 0x040f: "is_IS", # Icelandic - 0x0410: "it_IT", # Italian (Standard) - 0x0411: "ja_JA", # Japanese - 0x0414: "no_NO", # Norwegian (Bokmal) - 0x0816: "pt_PT", # Portuguese (Standard) - 0x0c0a: "es_ES", # Spanish (Modern Sort) - 0x0441: "sw_KE", # Swahili (Kenya) - 0x041d: "sv_SE", # Swedish - 0x081d: "sv_FI", # Swedish (Finland) + 0x0421: "id_ID", # Indonesian + 0x045d: "iu_CA", # Inuktitut + 0x085d: "iu_CA", # Inuktitut - Latin + 0x083c: "ga_IE", # Irish - Ireland + 0x0434: "xh_ZA", # Xhosa - South Africa + 0x0435: "zu_ZA", # Zulu + 0x0410: "it_IT", # Italian - Italy + 0x0810: "it_CH", # Italian - Switzerland + 0x0411: "ja_JP", # Japanese + 0x044b: "kn_IN", # Kannada - India + 0x043f: "kk_KZ", # Kazakh + 0x0457: "kok_IN",# Konkani + 0x0412: "ko_KR", # Korean + 0x0440: "ky_KG", # Kyrgyz + 0x0426: "lv_LV", # Latvian + 0x0427: "lt_LT", # Lithuanian + 0x046e: "lb_LU", # Luxembourgish + 0x042f: "mk_MK", # FYRO Macedonian + 0x043e: "ms_MY", # Malay - Malaysia + 0x083e: "ms_BN", # Malay - Brunei + 0x044c: "ml_IN", # Malayalam - India + 0x043a: "mt_MT", # Maltese + 0x0481: "mi_NZ", # Maori + 0x047a: "arn_CL",# Mapudungun + 0x044e: "mr_IN", # Marathi + 0x047c: "moh_CA",# Mohawk - Canada + 0x0450: "mn_MN", # Mongolian + 0x0461: "ne_NP", # Nepali + 0x0414: "nb_NO", # Norwegian - Bokmal + 0x0814: "nn_NO", # Norwegian - Nynorsk + 0x0482: "oc_FR", # Occitan - France + 0x0448: "or_IN", # Oriya - India + 0x0463: "ps_AF", # Pashto - Afghanistan + 0x0429: "fa_IR", # Persian + 0x0415: "pl_PL", # Polish + 0x0416: "pt_BR", # Portuguese - Brazil + 0x0816: "pt_PT", # Portuguese - Portugal + 0x0446: "pa_IN", # Punjabi + 0x046b: "quz_BO",# Quechua (Bolivia) + 0x086b: "quz_EC",# Quechua (Ecuador) + 0x0c6b: "quz_PE",# Quechua (Peru) + 0x0418: "ro_RO", # Romanian - Romania + 0x0417: "rm_CH", # Raeto-Romanese + 0x0419: "ru_RU", # Russian + 0x243b: "smn_FI",# Sami Finland + 0x103b: "smj_NO",# Sami Norway + 0x143b: "smj_SE",# Sami Sweden + 0x043b: "se_NO", # Sami Northern Norway + 0x083b: "se_SE", # Sami Northern Sweden + 0x0c3b: "se_FI", # Sami Northern Finland + 0x203b: "sms_FI",# Sami Skolt + 0x183b: "sma_NO",# Sami Southern Norway + 0x1c3b: "sma_SE",# Sami Southern Sweden + 0x044f: "sa_IN", # Sanskrit + 0x0c1a: "sr_SP", # Serbian - Cyrillic + 0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic + 0x081a: "sr_SP", # Serbian - Latin + 0x181a: "sr_BA", # Serbian - Bosnia Latin + 0x046c: "ns_ZA", # Northern Sotho + 0x0432: "tn_ZA", # Setswana - Southern Africa + 0x041b: "sk_SK", # Slovak + 0x0424: "sl_SI", # Slovenian + 0x040a: "es_ES", # Spanish - Spain + 0x080a: "es_MX", # Spanish - Mexico + 0x0c0a: "es_ES", # Spanish - Spain (Modern) + 0x100a: "es_GT", # Spanish - Guatemala + 0x140a: "es_CR", # Spanish - Costa Rica + 0x180a: "es_PA", # Spanish - Panama + 0x1c0a: "es_DO", # Spanish - Dominican Republic + 0x200a: "es_VE", # Spanish - Venezuela + 0x240a: "es_CO", # Spanish - Colombia + 0x280a: "es_PE", # Spanish - Peru + 0x2c0a: "es_AR", # Spanish - Argentina + 0x300a: "es_EC", # Spanish - Ecuador + 0x340a: "es_CL", # Spanish - Chile + 0x380a: "es_UR", # Spanish - Uruguay + 0x3c0a: "es_PY", # Spanish - Paraguay + 0x400a: "es_BO", # Spanish - Bolivia + 0x440a: "es_SV", # Spanish - El Salvador + 0x480a: "es_HN", # Spanish - Honduras + 0x4c0a: "es_NI", # Spanish - Nicaragua + 0x500a: "es_PR", # Spanish - Puerto Rico + 0x0441: "sw_KE", # Swahili + 0x041d: "sv_SE", # Swedish - Sweden + 0x081d: "sv_FI", # Swedish - Finland + 0x045a: "syr_SY",# Syriac + 0x0449: "ta_IN", # Tamil + 0x0444: "tt_RU", # Tatar + 0x044a: "te_IN", # Telugu + 0x041e: "th_TH", # Thai 0x041f: "tr_TR", # Turkish + 0x0422: "uk_UA", # Ukrainian + 0x0420: "ur_PK", # Urdu + 0x0820: "ur_IN", # Urdu - India + 0x0443: "uz_UZ", # Uzbek - Latin + 0x0843: "uz_UZ", # Uzbek - Cyrillic + 0x042a: "vi_VN", # Vietnamese + 0x0452: "cy_GB", # Welsh } def _print_locale(): Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Fri Jan 20 10:07:55 2006 @@ -68,12 +68,14 @@ Library ------- +- Bug #1371247: Update Windows locale identifiers in locale.py. + - Bug #1394565: SimpleHTTPServer now doesn't choke on query parameters any more. - + - Bug #1403410: The warnings module now doesn't get confused when it can't find out the module name it generates a warning for. - + - Patch #1117398: cookielib.LWPCookieJar and .MozillaCookieJar now raise LoadError as documented, instead of IOError. For compatibility, LoadError subclasses IOError. From python-checkins at python.org Fri Jan 20 10:14:38 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 10:14:38 +0100 (CET) Subject: [Python-checkins] r42102 - in python/trunk: Doc/lib/libstdtypes.tex Misc/NEWS Message-ID: <20060120091438.CBEC81E400B@bag.python.org> Author: georg.brandl Date: Fri Jan 20 10:14:36 2006 New Revision: 42102 Modified: python/trunk/Doc/lib/libstdtypes.tex python/trunk/Misc/NEWS Log: Bug #1396471: Document that Windows' ftell() can return invalid values for text files with UNIX-style line endings. Modified: python/trunk/Doc/lib/libstdtypes.tex ============================================================================== --- python/trunk/Doc/lib/libstdtypes.tex (original) +++ python/trunk/Doc/lib/libstdtypes.tex Fri Jan 20 10:14:36 2006 @@ -625,7 +625,7 @@ \begin{methoddesc}[string]{find}{sub\optional{, start\optional{, end}}} Return the lowest index in the string where substring \var{sub} is found, such that \var{sub} is contained in the range [\var{start}, -\var{end}). Optional arguments \var{start} and \var{end} are +\var{end}]. Optional arguments \var{start} and \var{end} are interpreted as in slice notation. Return \code{-1} if \var{sub} is not found. \end{methoddesc} @@ -1598,6 +1598,10 @@ \begin{methoddesc}[file]{tell}{} Return the file's current position, like \code{stdio}'s \cfunction{ftell()}. + + \note{On Windows, \method{tell()} can return illegal values (after an + \cfunction{fgets()}) when reading files with \UNIX{}-style line-endings. + Use binary mode (\code{'rb'}) to circumvent this problem.} \end{methoddesc} \begin{methoddesc}[file]{truncate}{\optional{size}} Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Fri Jan 20 10:14:36 2006 @@ -727,6 +727,9 @@ Documentation ------------- +- Bug #1396471: Document that Windows' ftell() can return invalid + values for text files with UNIX-style line endings. + - Bug #1274828: Document os.path.splitunc(). - Bug #1190204: Clarify which directories are searched by site.py. From python-checkins at python.org Fri Jan 20 10:14:43 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 10:14:43 +0100 (CET) Subject: [Python-checkins] r42103 - in python/branches/release24-maint: Doc/lib/libstdtypes.tex Misc/NEWS Message-ID: <20060120091443.124501E400B@bag.python.org> Author: georg.brandl Date: Fri Jan 20 10:14:41 2006 New Revision: 42103 Modified: python/branches/release24-maint/Doc/lib/libstdtypes.tex python/branches/release24-maint/Misc/NEWS Log: Bug #1396471: Document that Windows' ftell() can return invalid values for text files with UNIX-style line endings. Modified: python/branches/release24-maint/Doc/lib/libstdtypes.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libstdtypes.tex (original) +++ python/branches/release24-maint/Doc/lib/libstdtypes.tex Fri Jan 20 10:14:41 2006 @@ -625,7 +625,7 @@ \begin{methoddesc}[string]{find}{sub\optional{, start\optional{, end}}} Return the lowest index in the string where substring \var{sub} is found, such that \var{sub} is contained in the range [\var{start}, -\var{end}). Optional arguments \var{start} and \var{end} are +\var{end}]. Optional arguments \var{start} and \var{end} are interpreted as in slice notation. Return \code{-1} if \var{sub} is not found. \end{methoddesc} @@ -1598,6 +1598,10 @@ \begin{methoddesc}[file]{tell}{} Return the file's current position, like \code{stdio}'s \cfunction{ftell()}. + + \note{On Windows, \method{tell()} can return illegal values (after an + \cfunction{fgets()}) when reading files with \UNIX{}-style line-endings. + Use binary mode (\code{'rb'}) to circumvent this problem.} \end{methoddesc} \begin{methoddesc}[file]{truncate}{\optional{size}} Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Fri Jan 20 10:14:41 2006 @@ -346,6 +346,9 @@ Documentation ------------- +- Bug #1396471: Document that Windows' ftell() can return invalid + values for text files with UNIX-style line endings. + - Bug #1274828: Document os.path.splitunc(). - Bug #1190204: Clarify which directories are searched by site.py. From python-checkins at python.org Fri Jan 20 10:34:32 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 10:34:32 +0100 (CET) Subject: [Python-checkins] r42104 - in python/trunk: Doc/lib/libdl.tex Misc/NEWS Message-ID: <20060120093432.60DB51E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 20 10:34:29 2006 New Revision: 42104 Modified: python/trunk/Doc/lib/libdl.tex python/trunk/Misc/NEWS Log: Bug #1402224: Add warning to dl docs about crashes. Modified: python/trunk/Doc/lib/libdl.tex ============================================================================== --- python/trunk/Doc/lib/libdl.tex (original) +++ python/trunk/Doc/lib/libdl.tex Fri Jan 20 10:34:29 2006 @@ -10,6 +10,10 @@ \UNIX{} platforms for handling dynamically linked libraries. It allows the program to call arbitrary functions in such a library. +\warning{The \module{dl} module bypasses the Python type system and +error handling. If used incorrectly it may cause segmentation faults, +crashes or other incorrect behaviour.} + \note{This module will not work unless \code{sizeof(int) == sizeof(long) == sizeof(char *)} If this is not the case, \exception{SystemError} will be raised on Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Fri Jan 20 10:34:29 2006 @@ -727,6 +727,8 @@ Documentation ------------- +- Bug #1402224: Add warning to dl docs about crashes. + - Bug #1396471: Document that Windows' ftell() can return invalid values for text files with UNIX-style line endings. From python-checkins at python.org Fri Jan 20 10:35:05 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 10:35:05 +0100 (CET) Subject: [Python-checkins] r42105 - in python/branches/release24-maint: Doc/lib/libdl.tex Misc/NEWS Message-ID: <20060120093505.C37AB1E400F@bag.python.org> Author: georg.brandl Date: Fri Jan 20 10:35:04 2006 New Revision: 42105 Modified: python/branches/release24-maint/Doc/lib/libdl.tex python/branches/release24-maint/Misc/NEWS Log: Bug #1402224: Add warning to dl docs about crashes. Modified: python/branches/release24-maint/Doc/lib/libdl.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libdl.tex (original) +++ python/branches/release24-maint/Doc/lib/libdl.tex Fri Jan 20 10:35:04 2006 @@ -10,6 +10,10 @@ \UNIX{} platforms for handling dynamically linked libraries. It allows the program to call arbitrary functions in such a library. +\warning{The \module{dl} module bypasses the Python type system and +error handling. If used incorrectly it may cause segmentation faults, +crashes or other incorrect behaviour.} + \note{This module will not work unless \code{sizeof(int) == sizeof(long) == sizeof(char *)} If this is not the case, \exception{SystemError} will be raised on Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Fri Jan 20 10:35:04 2006 @@ -346,6 +346,8 @@ Documentation ------------- +- Bug #1402224: Add warning to dl docs about crashes. + - Bug #1396471: Document that Windows' ftell() can return invalid values for text files with UNIX-style line endings. From mal at egenix.com Fri Jan 20 13:28:04 2006 From: mal at egenix.com (M.-A. Lemburg) Date: Fri, 20 Jan 2006 13:28:04 +0100 Subject: [Python-checkins] r42100 - in python/trunk: Lib/locale.py Misc/NEWS In-Reply-To: <20060120090739.884851E4002@bag.python.org> References: <20060120090739.884851E4002@bag.python.org> Message-ID: <43D0D754.6050404@egenix.com> georg.brandl wrote: > Author: georg.brandl > Date: Fri Jan 20 10:07:35 2006 > New Revision: 42100 > > Modified: > python/trunk/Lib/locale.py > python/trunk/Misc/NEWS > Log: > Bug #1371247: Update Windows LCIDs in locale.py. > > > > Modified: python/trunk/Lib/locale.py > ============================================================================== > --- python/trunk/Lib/locale.py (original) > +++ python/trunk/Lib/locale.py Fri Jan 20 10:07:35 2006 Please re-add the note regarding the bug report. > # > -# this maps windows language identifiers (as used on Windows 95 and > -# earlier) to locale strings. > +# This maps Windows language identifiers to locale strings. > # > -# NOTE: this mapping is incomplete. If your language is missing, please > -# submit a bug report to Python bug manager, which you can find via: > -# http://www.python.org/dev/ > -# Make sure you include the missing language identifier and the suggested > -# locale code. > +# This list has been updated from > +# http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp > +# to include every locale up to Windows XP. > # -- Marc-Andre Lemburg eGenix.com Professional Python Services directly from the Source (#1, Jan 20 2006) >>> Python/Zope Consulting and Support ... http://www.egenix.com/ >>> mxODBC.Zope.Database.Adapter ... http://zope.egenix.com/ >>> mxODBC, mxDateTime, mxTextTools ... http://python.egenix.com/ ________________________________________________________________________ ::: Try mxODBC.Zope.DA for Windows,Linux,Solaris,FreeBSD for free ! :::: From python-checkins at python.org Fri Jan 20 14:38:27 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 14:38:27 +0100 (CET) Subject: [Python-checkins] r42106 - python/trunk/Lib/locale.py Message-ID: <20060120133827.76AB91E4007@bag.python.org> Author: georg.brandl Date: Fri Jan 20 14:38:26 2006 New Revision: 42106 Modified: python/trunk/Lib/locale.py Log: Readd bug report note. Modified: python/trunk/Lib/locale.py ============================================================================== --- python/trunk/Lib/locale.py (original) +++ python/trunk/Lib/locale.py Fri Jan 20 14:38:26 2006 @@ -1216,6 +1216,12 @@ # http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp # to include every locale up to Windows XP. # +# NOTE: this mapping is incomplete. If your language is missing, please +# submit a bug report to Python bug manager, which you can find via: +# http://www.python.org/dev/ +# Make sure you include the missing language identifier and the suggested +# locale code. +# windows_locale = { 0x0436: "af_ZA", # Afrikaans From python-checkins at python.org Fri Jan 20 14:38:42 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 14:38:42 +0100 (CET) Subject: [Python-checkins] r42107 - python/branches/release24-maint/Lib/locale.py Message-ID: <20060120133842.48BE71E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 20 14:38:41 2006 New Revision: 42107 Modified: python/branches/release24-maint/Lib/locale.py Log: Readd bug report note. Modified: python/branches/release24-maint/Lib/locale.py ============================================================================== --- python/branches/release24-maint/Lib/locale.py (original) +++ python/branches/release24-maint/Lib/locale.py Fri Jan 20 14:38:41 2006 @@ -667,6 +667,12 @@ # http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp # to include every locale up to Windows XP. # +# NOTE: this mapping is incomplete. If your language is missing, please +# submit a bug report to Python bug manager, which you can find via: +# http://www.python.org/dev/ +# Make sure you include the missing language identifier and the suggested +# locale code. +# windows_locale = { 0x0436: "af_ZA", # Afrikaans From python-checkins at python.org Fri Jan 20 18:24:29 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 18:24:29 +0100 (CET) Subject: [Python-checkins] r42108 - in python/trunk: Lib/urlparse.py Misc/NEWS Message-ID: <20060120172429.D51BD1E4050@bag.python.org> Author: georg.brandl Date: Fri Jan 20 18:24:23 2006 New Revision: 42108 Modified: python/trunk/Lib/urlparse.py python/trunk/Misc/NEWS Log: Bug #1407902: Added support for sftp:// URIs to urlparse. Modified: python/trunk/Lib/urlparse.py ============================================================================== --- python/trunk/Lib/urlparse.py (original) +++ python/trunk/Lib/urlparse.py Fri Jan 20 18:24:23 2006 @@ -9,22 +9,22 @@ # A classification of schemes ('' means apply by default) uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', - 'wais', 'file', 'https', 'shttp', 'mms', - 'prospero', 'rtsp', 'rtspu', ''] + 'wais', 'file', 'https', 'shttp', 'mms', + 'prospero', 'rtsp', 'rtspu', '', 'sftp'] uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', - 'imap', 'wais', 'file', 'mms', 'https', 'shttp', - 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', - 'svn', 'svn+ssh'] + 'imap', 'wais', 'file', 'mms', 'https', 'shttp', + 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', + 'svn', 'svn+ssh', 'sftp'] non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', - 'telnet', 'wais', 'imap', 'snews', 'sip'] + 'telnet', 'wais', 'imap', 'snews', 'sip'] uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', - 'https', 'shttp', 'rtsp', 'rtspu', 'sip', - 'mms', ''] + 'https', 'shttp', 'rtsp', 'rtspu', 'sip', + 'mms', '', 'sftp'] uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', - 'gopher', 'rtsp', 'rtspu', 'sip', ''] + 'gopher', 'rtsp', 'rtspu', 'sip', ''] uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', - 'nntp', 'wais', 'https', 'shttp', 'snews', - 'file', 'prospero', ''] + 'nntp', 'wais', 'https', 'shttp', 'snews', + 'file', 'prospero', ''] # Characters valid in scheme names scheme_chars = ('abcdefghijklmnopqrstuvwxyz' Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Fri Jan 20 18:24:23 2006 @@ -337,6 +337,8 @@ Library ------- +- Bug #1407902: Added support for sftp:// URIs to urlparse. + - Bug #1371247: Update Windows locale identifiers in locale.py. - Bug #1394565: SimpleHTTPServer now doesn't choke on query parameters From python-checkins at python.org Fri Jan 20 18:24:37 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 18:24:37 +0100 (CET) Subject: [Python-checkins] r42109 - in python/branches/release24-maint: Lib/urlparse.py Misc/NEWS Message-ID: <20060120172437.C25801E405E@bag.python.org> Author: georg.brandl Date: Fri Jan 20 18:24:34 2006 New Revision: 42109 Modified: python/branches/release24-maint/Lib/urlparse.py python/branches/release24-maint/Misc/NEWS Log: Bug #1407902: Added support for sftp:// URIs to urlparse. Modified: python/branches/release24-maint/Lib/urlparse.py ============================================================================== --- python/branches/release24-maint/Lib/urlparse.py (original) +++ python/branches/release24-maint/Lib/urlparse.py Fri Jan 20 18:24:34 2006 @@ -9,22 +9,22 @@ # A classification of schemes ('' means apply by default) uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', - 'wais', 'file', 'https', 'shttp', 'mms', - 'prospero', 'rtsp', 'rtspu', ''] + 'wais', 'file', 'https', 'shttp', 'mms', + 'prospero', 'rtsp', 'rtspu', '', 'sftp'] uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', - 'imap', 'wais', 'file', 'mms', 'https', 'shttp', - 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', - 'svn', 'svn+ssh'] + 'imap', 'wais', 'file', 'mms', 'https', 'shttp', + 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', + 'svn', 'svn+ssh', 'sftp'] non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', - 'telnet', 'wais', 'imap', 'snews', 'sip'] + 'telnet', 'wais', 'imap', 'snews', 'sip'] uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', - 'https', 'shttp', 'rtsp', 'rtspu', 'sip', - 'mms', ''] + 'https', 'shttp', 'rtsp', 'rtspu', 'sip', + 'mms', '', 'sftp'] uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', - 'gopher', 'rtsp', 'rtspu', 'sip', ''] + 'gopher', 'rtsp', 'rtspu', 'sip', ''] uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', - 'nntp', 'wais', 'https', 'shttp', 'snews', - 'file', 'prospero', ''] + 'nntp', 'wais', 'https', 'shttp', 'snews', + 'file', 'prospero', ''] # Characters valid in scheme names scheme_chars = ('abcdefghijklmnopqrstuvwxyz' Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Fri Jan 20 18:24:34 2006 @@ -68,6 +68,8 @@ Library ------- +- Bug #1407902: Added support for sftp:// URIs to urlparse. + - Bug #1371247: Update Windows locale identifiers in locale.py. - Bug #1394565: SimpleHTTPServer now doesn't choke on query parameters From python-checkins at python.org Fri Jan 20 18:48:54 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 18:48:54 +0100 (CET) Subject: [Python-checkins] r42110 - python/trunk/Lib/test/test_unicode.py Message-ID: <20060120174854.F3A181E4007@bag.python.org> Author: georg.brandl Date: Fri Jan 20 18:48:54 2006 New Revision: 42110 Modified: python/trunk/Lib/test/test_unicode.py Log: Checkin the test of patch #1400181. Modified: python/trunk/Lib/test/test_unicode.py ============================================================================== --- python/trunk/Lib/test/test_unicode.py (original) +++ python/trunk/Lib/test/test_unicode.py Fri Jan 20 18:48:54 2006 @@ -411,6 +411,20 @@ return u'\u1234' self.assertEqual('%s' % Wrapper(), u'\u1234') + def test_format_float(self): + try: + import locale + orig_locale = locale.setlocale(locale.LC_ALL) + locale.setlocale(locale.LC_ALL, 'de_DE') + except (ImportError, locale.Error): + return # skip if we can't set locale + + try: + # should not format with a comma, but always with C locale + self.assertEqual(u'1.0', u'%.1f' % 1.0) + finally: + locale.setlocale(locale.LC_ALL, orig_locale) + def test_constructor(self): # unicode(obj) tests (this maps to PyObject_Unicode() at C level) From python-checkins at python.org Fri Jan 20 18:48:58 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 18:48:58 +0100 (CET) Subject: [Python-checkins] r42111 - python/branches/release24-maint/Lib/test/test_unicode.py Message-ID: <20060120174858.09ECF1E4007@bag.python.org> Author: georg.brandl Date: Fri Jan 20 18:48:57 2006 New Revision: 42111 Modified: python/branches/release24-maint/Lib/test/test_unicode.py Log: Checkin the test of patch #1400181. Modified: python/branches/release24-maint/Lib/test/test_unicode.py ============================================================================== --- python/branches/release24-maint/Lib/test/test_unicode.py (original) +++ python/branches/release24-maint/Lib/test/test_unicode.py Fri Jan 20 18:48:57 2006 @@ -390,6 +390,20 @@ self.assertEqual('%c' % u'a', u'a') + def test_format_float(self): + try: + import locale + orig_locale = locale.setlocale(locale.LC_ALL) + locale.setlocale(locale.LC_ALL, 'de_DE') + except (ImportError, locale.Error): + return # skip if we can't set locale + + try: + # should not format with a comma, but always with C locale + self.assertEqual(u'1.0', u'%.1f' % 1.0) + finally: + locale.setlocale(locale.LC_ALL, orig_locale) + def test_constructor(self): # unicode(obj) tests (this maps to PyObject_Unicode() at C level) From python-checkins at python.org Fri Jan 20 18:51:38 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 18:51:38 +0100 (CET) Subject: [Python-checkins] r42112 - python/trunk/Doc/ref/ref5.tex Message-ID: <20060120175138.4AB461E400F@bag.python.org> Author: georg.brandl Date: Fri Jan 20 18:51:37 2006 New Revision: 42112 Modified: python/trunk/Doc/ref/ref5.tex Log: Add markup to a "Python Standard Library" doc reference I added for bug 839585. Modified: python/trunk/Doc/ref/ref5.tex ============================================================================== --- python/trunk/Doc/ref/ref5.tex (original) +++ python/trunk/Doc/ref/ref5.tex Fri Jan 20 18:51:37 2006 @@ -780,8 +780,9 @@ In addition to performing the modulo operation on numbers, the \code{\%} operator is also overloaded by string and unicode objects to perform string formatting (also known as interpolation). The syntax for string -formatting is described in the Python Library Reference, section -``Sequence Types''. +formatting is described in the +\citetitle[../lib/typesseq-strings.html]{Python Library Reference}, +section ``Sequence Types''. \deprecated{2.3}{The floor division operator, the modulo operator, and the \function{divmod()} function are no longer defined for complex From python-checkins at python.org Fri Jan 20 18:51:41 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 18:51:41 +0100 (CET) Subject: [Python-checkins] r42113 - python/branches/release24-maint/Doc/ref/ref5.tex Message-ID: <20060120175141.41DA81E400B@bag.python.org> Author: georg.brandl Date: Fri Jan 20 18:51:40 2006 New Revision: 42113 Modified: python/branches/release24-maint/Doc/ref/ref5.tex Log: Add markup to a "Python Standard Library" doc reference I added for bug 839585. Modified: python/branches/release24-maint/Doc/ref/ref5.tex ============================================================================== --- python/branches/release24-maint/Doc/ref/ref5.tex (original) +++ python/branches/release24-maint/Doc/ref/ref5.tex Fri Jan 20 18:51:40 2006 @@ -780,8 +780,9 @@ In addition to performing the modulo operation on numbers, the \code{\%} operator is also overloaded by string and unicode objects to perform string formatting (also known as interpolation). The syntax for string -formatting is described in the Python Library Reference, section -``Sequence Types''. +formatting is described in the +\citetitle[../lib/typesseq-strings.html]{Python Library Reference}, +section ``Sequence Types''. \deprecated{2.3}{The floor division operator, the modulo operator, and the \function{divmod()} function are no longer defined for complex From python-checkins at python.org Fri Jan 20 18:53:28 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 18:53:28 +0100 (CET) Subject: [Python-checkins] r42114 - python/trunk/Python/getargs.c Message-ID: <20060120175328.B6B021E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 20 18:53:27 2006 New Revision: 42114 Modified: python/trunk/Python/getargs.c Log: typo Modified: python/trunk/Python/getargs.c ============================================================================== --- python/trunk/Python/getargs.c (original) +++ python/trunk/Python/getargs.c Fri Jan 20 18:53:27 2006 @@ -1286,7 +1286,7 @@ } /* required arguments missing from args can be supplied by keyword - arguments; set len to the number of posiitional arguments, and, + arguments; set len to the number of positional arguments, and, if that's less than the minimum required, add in the number of required arguments that are supplied by keywords */ len = nargs; From python-checkins at python.org Fri Jan 20 18:55:00 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 18:55:00 +0100 (CET) Subject: [Python-checkins] r42115 - python/trunk/Lib/unittest.py Message-ID: <20060120175500.7A7401E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 20 18:55:00 2006 New Revision: 42115 Modified: python/trunk/Lib/unittest.py Log: Patch #1388073: Make unittest.TestCase easier to subclass Modified: python/trunk/Lib/unittest.py ============================================================================== --- python/trunk/Lib/unittest.py (original) +++ python/trunk/Lib/unittest.py Fri Jan 20 18:55:00 2006 @@ -201,9 +201,9 @@ not have a method with the specified name. """ try: - self.__testMethodName = methodName + self._testMethodName = methodName testMethod = getattr(self, methodName) - self.__testMethodDoc = testMethod.__doc__ + self._testMethodDoc = testMethod.__doc__ except AttributeError: raise ValueError, "no such test method in %s: %s" % \ (self.__class__, methodName) @@ -229,30 +229,30 @@ The default implementation of this method returns the first line of the specified test method's docstring. """ - doc = self.__testMethodDoc + doc = self._testMethodDoc return doc and doc.split("\n")[0].strip() or None def id(self): - return "%s.%s" % (_strclass(self.__class__), self.__testMethodName) + return "%s.%s" % (_strclass(self.__class__), self._testMethodName) def __str__(self): - return "%s (%s)" % (self.__testMethodName, _strclass(self.__class__)) + return "%s (%s)" % (self._testMethodName, _strclass(self.__class__)) def __repr__(self): return "<%s testMethod=%s>" % \ - (_strclass(self.__class__), self.__testMethodName) + (_strclass(self.__class__), self._testMethodName) def run(self, result=None): if result is None: result = self.defaultTestResult() result.startTest(self) - testMethod = getattr(self, self.__testMethodName) + testMethod = getattr(self, self._testMethodName) try: try: self.setUp() except KeyboardInterrupt: raise except: - result.addError(self, self.__exc_info()) + result.addError(self, self._exc_info()) return ok = False @@ -260,18 +260,18 @@ testMethod() ok = True except self.failureException: - result.addFailure(self, self.__exc_info()) + result.addFailure(self, self._exc_info()) except KeyboardInterrupt: raise except: - result.addError(self, self.__exc_info()) + result.addError(self, self._exc_info()) try: self.tearDown() except KeyboardInterrupt: raise except: - result.addError(self, self.__exc_info()) + result.addError(self, self._exc_info()) ok = False if ok: result.addSuccess(self) finally: @@ -283,10 +283,10 @@ def debug(self): """Run the test without collecting errors in a TestResult""" self.setUp() - getattr(self, self.__testMethodName)() + getattr(self, self._testMethodName)() self.tearDown() - def __exc_info(self): + def _exc_info(self): """Return a version of sys.exc_info() with the traceback frame minimised; usually the top level of the traceback frame is not needed. From python-checkins at python.org Fri Jan 20 18:55:03 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 18:55:03 +0100 (CET) Subject: [Python-checkins] r42116 - python/branches/release24-maint/Lib/unittest.py Message-ID: <20060120175503.4C0C71E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 20 18:55:02 2006 New Revision: 42116 Modified: python/branches/release24-maint/Lib/unittest.py Log: Patch #1388073: Make unittest.TestCase easier to subclass Modified: python/branches/release24-maint/Lib/unittest.py ============================================================================== --- python/branches/release24-maint/Lib/unittest.py (original) +++ python/branches/release24-maint/Lib/unittest.py Fri Jan 20 18:55:02 2006 @@ -201,9 +201,9 @@ not have a method with the specified name. """ try: - self.__testMethodName = methodName + self._testMethodName = methodName testMethod = getattr(self, methodName) - self.__testMethodDoc = testMethod.__doc__ + self._testMethodDoc = testMethod.__doc__ except AttributeError: raise ValueError, "no such test method in %s: %s" % \ (self.__class__, methodName) @@ -229,30 +229,30 @@ The default implementation of this method returns the first line of the specified test method's docstring. """ - doc = self.__testMethodDoc + doc = self._testMethodDoc return doc and doc.split("\n")[0].strip() or None def id(self): - return "%s.%s" % (_strclass(self.__class__), self.__testMethodName) + return "%s.%s" % (_strclass(self.__class__), self._testMethodName) def __str__(self): - return "%s (%s)" % (self.__testMethodName, _strclass(self.__class__)) + return "%s (%s)" % (self._testMethodName, _strclass(self.__class__)) def __repr__(self): return "<%s testMethod=%s>" % \ - (_strclass(self.__class__), self.__testMethodName) + (_strclass(self.__class__), self._testMethodName) def run(self, result=None): if result is None: result = self.defaultTestResult() result.startTest(self) - testMethod = getattr(self, self.__testMethodName) + testMethod = getattr(self, self._testMethodName) try: try: self.setUp() except KeyboardInterrupt: raise except: - result.addError(self, self.__exc_info()) + result.addError(self, self._exc_info()) return ok = False @@ -260,18 +260,18 @@ testMethod() ok = True except self.failureException: - result.addFailure(self, self.__exc_info()) + result.addFailure(self, self._exc_info()) except KeyboardInterrupt: raise except: - result.addError(self, self.__exc_info()) + result.addError(self, self._exc_info()) try: self.tearDown() except KeyboardInterrupt: raise except: - result.addError(self, self.__exc_info()) + result.addError(self, self._exc_info()) ok = False if ok: result.addSuccess(self) finally: @@ -283,10 +283,10 @@ def debug(self): """Run the test without collecting errors in a TestResult""" self.setUp() - getattr(self, self.__testMethodName)() + getattr(self, self._testMethodName)() self.tearDown() - def __exc_info(self): + def _exc_info(self): """Return a version of sys.exc_info() with the traceback frame minimised; usually the top level of the traceback frame is not needed. From guido at python.org Fri Jan 20 19:27:00 2006 From: guido at python.org (Guido van Rossum) Date: Fri, 20 Jan 2006 10:27:00 -0800 Subject: [Python-checkins] r42109 - in python/branches/release24-maint: Lib/urlparse.py Misc/NEWS In-Reply-To: <20060120172437.C25801E405E@bag.python.org> References: <20060120172437.C25801E405E@bag.python.org> Message-ID: This sounds awfully close to a feature. It's a slippery slope. On 1/20/06, georg.brandl wrote: > Author: georg.brandl > Date: Fri Jan 20 18:24:34 2006 > New Revision: 42109 > > Modified: > python/branches/release24-maint/Lib/urlparse.py > python/branches/release24-maint/Misc/NEWS > Log: > Bug #1407902: Added support for sftp:// URIs to urlparse. > > > Modified: python/branches/release24-maint/Lib/urlparse.py > ============================================================================== > --- python/branches/release24-maint/Lib/urlparse.py (original) > +++ python/branches/release24-maint/Lib/urlparse.py Fri Jan 20 18:24:34 2006 > @@ -9,22 +9,22 @@ > > # A classification of schemes ('' means apply by default) > uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', > - 'wais', 'file', 'https', 'shttp', 'mms', > - 'prospero', 'rtsp', 'rtspu', ''] > + 'wais', 'file', 'https', 'shttp', 'mms', > + 'prospero', 'rtsp', 'rtspu', '', 'sftp'] > uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', > - 'imap', 'wais', 'file', 'mms', 'https', 'shttp', > - 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', > - 'svn', 'svn+ssh'] > + 'imap', 'wais', 'file', 'mms', 'https', 'shttp', > + 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', > + 'svn', 'svn+ssh', 'sftp'] > non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', > - 'telnet', 'wais', 'imap', 'snews', 'sip'] > + 'telnet', 'wais', 'imap', 'snews', 'sip'] > uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', > - 'https', 'shttp', 'rtsp', 'rtspu', 'sip', > - 'mms', ''] > + 'https', 'shttp', 'rtsp', 'rtspu', 'sip', > + 'mms', '', 'sftp'] > uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', > - 'gopher', 'rtsp', 'rtspu', 'sip', ''] > + 'gopher', 'rtsp', 'rtspu', 'sip', ''] > uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', > - 'nntp', 'wais', 'https', 'shttp', 'snews', > - 'file', 'prospero', ''] > + 'nntp', 'wais', 'https', 'shttp', 'snews', > + 'file', 'prospero', ''] > > # Characters valid in scheme names > scheme_chars = ('abcdefghijklmnopqrstuvwxyz' > > Modified: python/branches/release24-maint/Misc/NEWS > ============================================================================== > --- python/branches/release24-maint/Misc/NEWS (original) > +++ python/branches/release24-maint/Misc/NEWS Fri Jan 20 18:24:34 2006 > @@ -68,6 +68,8 @@ > Library > ------- > > +- Bug #1407902: Added support for sftp:// URIs to urlparse. > + > - Bug #1371247: Update Windows locale identifiers in locale.py. > > - Bug #1394565: SimpleHTTPServer now doesn't choke on query parameters > _______________________________________________ > Python-checkins mailing list > Python-checkins at python.org > http://mail.python.org/mailman/listinfo/python-checkins > -- --Guido van Rossum (home page: http://www.python.org/~guido/) From python-checkins at python.org Fri Jan 20 19:28:04 2006 From: python-checkins at python.org (vinay.sajip) Date: Fri, 20 Jan 2006 19:28:04 +0100 (CET) Subject: [Python-checkins] r42117 - python/trunk/Lib/logging/config.py Message-ID: <20060120182804.49D5F1E4002@bag.python.org> Author: vinay.sajip Date: Fri Jan 20 19:28:03 2006 New Revision: 42117 Modified: python/trunk/Lib/logging/config.py Log: Added the ability to specify a class attribute in Formatter configuration. Contributed by Shane Hathaway. Modified: python/trunk/Lib/logging/config.py ============================================================================== --- python/trunk/Lib/logging/config.py (original) +++ python/trunk/Lib/logging/config.py Fri Jan 20 19:28:03 2006 @@ -86,6 +86,21 @@ logging._releaseLock() +def _resolve(name): + """Resolve a dotted name to a global object.""" + name = string.split(name, '.') + used = name.pop(0) + found = __import__(used) + for n in name: + used = used + '.' + n + try: + found = getattr(found, n) + except AttributeError: + __import__(used) + found = getattr(found, n) + return found + + def _create_formatters(cp): """Create and return formatters""" flist = cp.get("formatters", "keys") @@ -104,7 +119,12 @@ dfs = cp.get(sectname, "datefmt", 1) else: dfs = None - f = logging.Formatter(fs, dfs) + c = logging.Formatter + if "class" in opts: + class_name = cp.get(sectname, "class") + if class_name: + c = _resolve(class_name) + f = c(fs, dfs) formatters[form] = f return formatters From python-checkins at python.org Fri Jan 20 19:29:00 2006 From: python-checkins at python.org (vinay.sajip) Date: Fri, 20 Jan 2006 19:29:00 +0100 (CET) Subject: [Python-checkins] r42118 - python/trunk/Lib/test/output/test_logging Message-ID: <20060120182900.4F60F1E4002@bag.python.org> Author: vinay.sajip Date: Fri Jan 20 19:28:59 2006 New Revision: 42118 Modified: python/trunk/Lib/test/output/test_logging Log: Added a test for the ability to specify a class attribute in Formatter configuration. Contributed by Shane Hathaway. Modified: python/trunk/Lib/test/output/test_logging ============================================================================== --- python/trunk/Lib/test/output/test_logging (original) +++ python/trunk/Lib/test/output/test_logging Fri Jan 20 19:28:59 2006 @@ -491,6 +491,10 @@ config2: exceptions.AttributeError config3: exceptions.KeyError -- log_test4 end --------------------------------------------------- +-- log_test5 begin --------------------------------------------------- +ERROR:root:just testing +exceptions.KeyError... Don't panic! +-- log_test5 end --------------------------------------------------- -- logrecv output begin --------------------------------------------------- ERR -> CRITICAL: Message 0 (via logrecv.tcp.ERR) ERR -> ERROR: Message 1 (via logrecv.tcp.ERR) From python-checkins at python.org Fri Jan 20 19:29:37 2006 From: python-checkins at python.org (vinay.sajip) Date: Fri, 20 Jan 2006 19:29:37 +0100 (CET) Subject: [Python-checkins] r42119 - python/trunk/Lib/test/test_logging.py Message-ID: <20060120182937.252511E4002@bag.python.org> Author: vinay.sajip Date: Fri Jan 20 19:29:36 2006 New Revision: 42119 Modified: python/trunk/Lib/test/test_logging.py Log: Added a test for the ability to specify a class attribute in Formatter configuration. Contributed by Shane Hathaway. Modified: python/trunk/Lib/test/test_logging.py ============================================================================== --- python/trunk/Lib/test/test_logging.py (original) +++ python/trunk/Lib/test/test_logging.py Fri Jan 20 19:29:36 2006 @@ -396,7 +396,7 @@ # Test 4 #---------------------------------------------------------------------------- -# config0 is a standard configuratin. +# config0 is a standard configuration. config0 = """ [loggers] keys=root @@ -489,6 +489,65 @@ loggerDict.update(saved_loggers) #---------------------------------------------------------------------------- +# Test 5 +#---------------------------------------------------------------------------- + +test5_config = """ +[loggers] +keys=root + +[handlers] +keys=hand1 + +[formatters] +keys=form1 + +[logger_root] +level=NOTSET +handlers=hand1 + +[handler_hand1] +class=StreamHandler +level=NOTSET +formatter=form1 +args=(sys.stdout,) + +[formatter_form1] +class=test.test_logging.FriendlyFormatter +format=%(levelname)s:%(name)s:%(message)s +datefmt= +""" + +class FriendlyFormatter (logging.Formatter): + def formatException(self, ei): + return "%s... Don't panic!" % str(ei[0]) + + +def test5(): + loggerDict = logging.getLogger().manager.loggerDict + saved_handlers = logging._handlers.copy() + saved_loggers = loggerDict.copy() + try: + fn = tempfile.mktemp(".ini") + f = open(fn, "w") + f.write(test5_config) + f.close() + logging.config.fileConfig(fn) + try: + raise KeyError + except KeyError: + logging.exception("just testing") + os.remove(fn) + finally: + logging._handlers.clear() + logging._handlers.update(saved_handlers) + loggerDict = logging.getLogger().manager.loggerDict + loggerDict.clear() + loggerDict.update(saved_loggers) + + + +#---------------------------------------------------------------------------- # Test Harness #---------------------------------------------------------------------------- def banner(nm, typ): @@ -540,21 +599,10 @@ banner("log_test0", "end") - banner("log_test1", "begin") - test1() - banner("log_test1", "end") - - banner("log_test2", "begin") - test2() - banner("log_test2", "end") - - banner("log_test3", "begin") - test3() - banner("log_test3", "end") - - banner("log_test4", "begin") - test4() - banner("log_test4", "end") + for t in range(1,6): + banner("log_test%d" % t, "begin") + globals()['test%d' % t]() + banner("log_test%d" % t, "end") finally: #wait for TCP receiver to terminate From fdrake at acm.org Fri Jan 20 20:02:12 2006 From: fdrake at acm.org (Fred L. Drake, Jr.) Date: Fri, 20 Jan 2006 14:02:12 -0500 Subject: [Python-checkins] r42109 - in python/branches/release24-maint: Lib/urlparse.py Misc/NEWS In-Reply-To: References: <20060120172437.C25801E405E@bag.python.org> Message-ID: <200601201402.13121.fdrake@acm.org> On Friday 20 January 2006 13:27, Guido van Rossum wrote: > This sounds awfully close to a feature. It's a slippery slope. Interesting. I didn't hear any complaints when I added svn: and svn+ssh: support to the urlparse module; Python 2.4 was already in mainenance at the time. -Fred -- Fred L. Drake, Jr. From guido at python.org Fri Jan 20 20:21:52 2006 From: guido at python.org (Guido van Rossum) Date: Fri, 20 Jan 2006 11:21:52 -0800 Subject: [Python-checkins] r42109 - in python/branches/release24-maint: Lib/urlparse.py Misc/NEWS In-Reply-To: <200601201402.13121.fdrake@acm.org> References: <20060120172437.C25801E405E@bag.python.org> <200601201402.13121.fdrake@acm.org> Message-ID: On 1/20/06, Fred L. Drake, Jr. wrote: > On Friday 20 January 2006 13:27, Guido van Rossum wrote: > > This sounds awfully close to a feature. It's a slippery slope. > > Interesting. I didn't hear any complaints when I added svn: and svn+ssh: > support to the urlparse module; Python 2.4 was already in mainenance at the > time. I didn't happen upon the checkin perhaps. I'm not sure georg's checkin is wrong, but I think we ought to have a (brief) discussion about the definition of a feature vs. a bug. Most bugs fixed in bugfix releases are much closer to being clear bugs. -- --Guido van Rossum (home page: http://www.python.org/~guido/) From python-checkins at python.org Fri Jan 20 21:03:25 2006 From: python-checkins at python.org (tim.peters) Date: Fri, 20 Jan 2006 21:03:25 +0100 (CET) Subject: [Python-checkins] r42120 - python/trunk/Lib/locale.py Message-ID: <20060120200325.532911E4002@bag.python.org> Author: tim.peters Date: Fri Jan 20 21:03:24 2006 New Revision: 42120 Modified: python/trunk/Lib/locale.py Log: Whitespace normalization. Modified: python/trunk/Lib/locale.py ============================================================================== --- python/trunk/Lib/locale.py (original) +++ python/trunk/Lib/locale.py Fri Jan 20 21:03:24 2006 @@ -1212,7 +1212,7 @@ # # This maps Windows language identifiers to locale strings. # -# This list has been updated from +# This list has been updated from # http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp # to include every locale up to Windows XP. # From g.brandl-nospam at gmx.net Fri Jan 20 21:12:23 2006 From: g.brandl-nospam at gmx.net (Georg Brandl) Date: Fri, 20 Jan 2006 21:12:23 +0100 Subject: [Python-checkins] r42109 - in python/branches/release24-maint: Lib/urlparse.py Misc/NEWS In-Reply-To: References: <20060120172437.C25801E405E@bag.python.org> <200601201402.13121.fdrake@acm.org> Message-ID: Guido van Rossum wrote: > On 1/20/06, Fred L. Drake, Jr. wrote: >> On Friday 20 January 2006 13:27, Guido van Rossum wrote: >> > This sounds awfully close to a feature. It's a slippery slope. >> >> Interesting. I didn't hear any complaints when I added svn: and svn+ssh: >> support to the urlparse module; Python 2.4 was already in mainenance at the >> time. > > I didn't happen upon the checkin perhaps. > > I'm not sure georg's checkin is wrong, but I think we ought to have a > (brief) discussion about the definition of a feature vs. a bug. Most > bugs fixed in bugfix releases are much closer to being clear bugs. One thing is, the urlparse documentation doesn't explicitly mention supported protocols. Georg From guido at python.org Fri Jan 20 21:29:16 2006 From: guido at python.org (Guido van Rossum) Date: Fri, 20 Jan 2006 12:29:16 -0800 Subject: [Python-checkins] r42109 - in python/branches/release24-maint: Lib/urlparse.py Misc/NEWS In-Reply-To: References: <20060120172437.C25801E405E@bag.python.org> <200601201402.13121.fdrake@acm.org> Message-ID: On 1/20/06, Georg Brandl wrote: > Guido van Rossum wrote: > > On 1/20/06, Fred L. Drake, Jr. wrote: > >> On Friday 20 January 2006 13:27, Guido van Rossum wrote: > >> > This sounds awfully close to a feature. It's a slippery slope. > >> > >> Interesting. I didn't hear any complaints when I added svn: and svn+ssh: > >> support to the urlparse module; Python 2.4 was already in mainenance at the > >> time. > > > > I didn't happen upon the checkin perhaps. > > > > I'm not sure georg's checkin is wrong, but I think we ought to have a > > (brief) discussion about the definition of a feature vs. a bug. Most > > bugs fixed in bugfix releases are much closer to being clear bugs. > > One thing is, the urlparse documentation doesn't explicitly mention supported > protocols. That's a doc bug. I'd like to see what Anthony thinks about this particular case. -- --Guido van Rossum (home page: http://www.python.org/~guido/) From barry at python.org Fri Jan 20 21:54:45 2006 From: barry at python.org (Barry Warsaw) Date: Fri, 20 Jan 2006 15:54:45 -0500 Subject: [Python-checkins] r42109 - in python/branches/release24-maint: Lib/urlparse.py Misc/NEWS In-Reply-To: References: <20060120172437.C25801E405E@bag.python.org> <200601201402.13121.fdrake@acm.org> Message-ID: <1137790485.8803.41.camel@geddy.wooz.org> On Fri, 2006-01-20 at 12:29 -0800, Guido van Rossum wrote: > That's a doc bug. > > I'd like to see what Anthony thinks about this particular case. Well, speaking as a former Python RM, it would make me uncomfortable. It smells of the Python 2.2.1 True/False fiasco (albeit on a smaller scale). -Barry -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 307 bytes Desc: This is a digitally signed message part Url : http://mail.python.org/pipermail/python-checkins/attachments/20060120/91ff902b/attachment.pgp From python-checkins at python.org Fri Jan 20 22:03:36 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 22:03:36 +0100 (CET) Subject: [Python-checkins] r42121 - in python/trunk: Doc/lib/libwebbrowser.tex Lib/webbrowser.py Message-ID: <20060120210336.2B1091E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 20 22:03:35 2006 New Revision: 42121 Modified: python/trunk/Doc/lib/libwebbrowser.tex python/trunk/Lib/webbrowser.py Log: Try to resolve the remaining webbrowser issues (backgrounding, local urls) Modified: python/trunk/Doc/lib/libwebbrowser.tex ============================================================================== --- python/trunk/Doc/lib/libwebbrowser.tex (original) +++ python/trunk/Doc/lib/libwebbrowser.tex Fri Jan 20 22:03:35 2006 @@ -19,9 +19,9 @@ If the environment variable \envvar{BROWSER} exists, it is interpreted to override the platform default list of browsers, as a os.pathsep-separated list of browsers to try in order. When the value of -a list part contains the string \code{\%s}, then it is interpreted as -a literal browser command line to be used with the argument URL -substituted for the \code{\%s}; if the part does not contain +a list part contains the string \code{\%s}, then it is +interpreted as a literal browser command line to be used with the argument URL +substituted for \code{\%s}; if the part does not contain \code{\%s}, it is simply interpreted as the name of the browser to launch. @@ -52,7 +52,7 @@ a new browser page ("tab") is opened if possible. If \var{autoraise} is true, the window is raised if possible (note that under many window managers this will occur regardless of the setting of this variable). - +\versionchanged[\var{new} can now be 2]{2.5} \end{funcdesc} \begin{funcdesc}{open_new}{url} @@ -96,16 +96,17 @@ \lineiii{'netscape'}{\class{Mozilla('netscape')}}{} \lineiii{'galeon'}{\class{Galeon('galeon')}}{} \lineiii{'epiphany'}{\class{Galeon('epiphany')}}{} - \lineiii{'skipstone'}{\class{GenericBrowser('skipstone \%s \&')}}{} + \lineiii{'skipstone'}{\class{BackgroundBrowser('skipstone')}}{} + \lineiii{'kfmclient'}{\class{Konqueror()}}{(1)} \lineiii{'konqueror'}{\class{Konqueror()}}{(1)} \lineiii{'kfm'}{\class{Konqueror()}}{(1)} - \lineiii{'mosaic'}{\class{GenericBrowser('mosaic \%s \&')}}{} + \lineiii{'mosaic'}{\class{BackgroundBrowser('mosaic')}}{} \lineiii{'opera'}{\class{Opera()}}{} \lineiii{'grail'}{\class{Grail()}}{} - \lineiii{'links'}{\class{GenericBrowser('links \%s')}}{} + \lineiii{'links'}{\class{GenericBrowser('links')}}{} \lineiii{'elinks'}{\class{Elinks('elinks')}}{} - \lineiii{'lynx'}{\class{GenericBrowser('lynx \%s')}}{} - \lineiii{'w3m'}{\class{GenericBrowser('w3m \%s')}}{} + \lineiii{'lynx'}{\class{GenericBrowser('lynx')}}{} + \lineiii{'w3m'}{\class{GenericBrowser('w3m')}}{} \lineiii{'windows-default'}{\class{WindowsDefault}}{(2)} \lineiii{'internet-config'}{\class{InternetConfig}}{(3)} \lineiii{'macosx'}{\class{MacOSX('default')}}{(4)} Modified: python/trunk/Lib/webbrowser.py ============================================================================== --- python/trunk/Lib/webbrowser.py (original) +++ python/trunk/Lib/webbrowser.py Fri Jan 20 22:03:35 2006 @@ -4,6 +4,8 @@ import os import sys import stat +import subprocess +import time __all__ = ["Error", "open", "open_new", "open_new_tab", "get", "register"] @@ -29,8 +31,8 @@ alternatives = _tryorder for browser in alternatives: if '%s' in browser: - # User gave us a command line, don't mess with it. - return GenericBrowser(browser) + # User gave us a command line, split it into name and args + return GenericBrowser(browser.split()) else: # User gave us a browser name or path. try: @@ -129,8 +131,10 @@ # General parent classes class BaseBrowser(object): - """Parent class for all browsers.""" - + """Parent class for all browsers. Do not use directly.""" + + args = ['%s'] + def __init__(self, name=""): self.name = name self.basename = name @@ -149,46 +153,98 @@ """Class for all browsers started with a command and without remote functionality.""" - def __init__(self, cmd): - self.name, self.args = cmd.split(None, 1) + def __init__(self, name): + if isinstance(name, basestring): + self.name = name + else: + # name should be a list with arguments + self.name = name[0] + self.args = name[1:] self.basename = os.path.basename(self.name) def open(self, url, new=0, autoraise=1): - assert "'" not in url - command = "%s %s" % (self.name, self.args) - rc = os.system(command % url) - return not rc + cmdline = [self.name] + [arg.replace("%s", url) + for arg in self.args] + try: + p = subprocess.Popen(cmdline, close_fds=True) + return not p.wait() + except OSError: + return False + + +class BackgroundBrowser(GenericBrowser): + """Class for all browsers which are to be started in the + background.""" + + def open(self, url, new=0, autoraise=1): + cmdline = [self.name] + [arg.replace("%s", url) + for arg in self.args] + setsid = getattr(os, 'setsid', None) + if not setsid: + setsid = getattr(os, 'setpgrp', None) + try: + p = subprocess.Popen(cmdline, close_fds=True, preexec_fn=setsid) + return (p.poll() is None) + except OSError: + return False class UnixBrowser(BaseBrowser): """Parent class for all Unix browsers with remote functionality.""" raise_opts = None - - remote_cmd = '' + remote_args = ['%action', '%s'] remote_action = None remote_action_newwin = None remote_action_newtab = None - remote_background = False + background = False + redirect_stdout = True - def _remote(self, url, action, autoraise): - autoraise = int(bool(autoraise)) # always 0/1 - raise_opt = self.raise_opts and self.raise_opts[autoraise] or '' - cmd = "%s %s %s '%s' >/dev/null 2>&1" % (self.name, raise_opt, - self.remote_cmd, action) - if self.remote_background: - cmd += ' &' - rc = os.system(cmd) - if rc: - cmd = "%s %s" % (self.name, url) - if self.remote_background: - cmd += " &" - # bad return status, try again with simpler command - rc = os.system(cmd) - return not rc + def _invoke(self, args, remote, autoraise): + raise_opt = [] + if remote and self.raise_opts: + # use autoraise argument only for remote invocation + autoraise = int(bool(autoraise)) + opt = self.raise_opts[autoraise] + if opt: raise_opt = [opt] + + cmdline = [self.name] + raise_opt + args + + if remote or self.background: + inout = file(os.devnull, "r+") + else: + # for TTY browsers, we need stdin/out + inout = None + # if possible, put browser in separate process group, so + # keyboard interrupts don't affect browser as well as Python + setsid = getattr(os, 'setsid', None) + if not setsid: + setsid = getattr(os, 'setpgrp', None) + + p = subprocess.Popen(cmdline, close_fds=True, stdin=inout, + stdout=(self.redirect_stdout and inout or None), + stderr=inout, preexec_fn=setsid) + if remote: + # wait five secons. If the subprocess is not finished, the + # remote invocation has (hopefully) started a new instance. + time.sleep(1) + rc = p.poll() + if rc is None: + time.sleep(4) + rc = p.poll() + if rc is None: + return True + # if remote call failed, open() will try direct invocation + return not rc + elif self.background: + if p.poll() is None: + return True + else: + return False + else: + return not p.wait() def open(self, url, new=0, autoraise=1): - assert "'" not in url if new == 0: action = self.remote_action elif new == 1: @@ -199,20 +255,31 @@ else: action = self.remote_action_newtab else: - raise Error("Bad 'new' parameter to open(); expected 0, 1, or 2, got %s" % new) - return self._remote(url, action % url, autoraise) + raise Error("Bad 'new' parameter to open(); " + + "expected 0, 1, or 2, got %s" % new) + + args = [arg.replace("%s", url).replace("%action", action) + for arg in self.remote_args] + success = self._invoke(args, True, autoraise) + if not success: + # remote invocation failed, try straight way + args = [arg.replace("%s", url) for arg in self.args] + return self._invoke(args, False, False) + else: + return True class Mozilla(UnixBrowser): """Launcher class for Mozilla/Netscape browsers.""" - raise_opts = ("-noraise", "-raise") + raise_opts = ["-noraise", "-raise"] - remote_cmd = '-remote' - remote_action = "openURL(%s)" - remote_action_newwin = "openURL(%s,new-window)" - remote_action_newtab = "openURL(%s,new-tab)" - remote_background = True + remote_args = ['-remote', 'openURL(%s%action)'] + remote_action = "" + remote_action_newwin = ",new-window" + remote_action_newtab = ",new-tab" + + background = True Netscape = Mozilla @@ -220,80 +287,101 @@ class Galeon(UnixBrowser): """Launcher class for Galeon/Epiphany browsers.""" - raise_opts = ("-noraise", "") - remote_action = "-n '%s'" - remote_action_newwin = "-w '%s'" + raise_opts = ["-noraise", ""] + remote_args = ['%action', '%s'] + remote_action = "-n" + remote_action_newwin = "-w" - remote_background = True + background = True -class Konqueror(BaseBrowser): - """Controller for the KDE File Manager (kfm, or Konqueror). - - See http://developer.kde.org/documentation/other/kfmclient.html - for more information on the Konqueror remote-control interface. +class Opera(UnixBrowser): + "Launcher class for Opera browser." - """ + raise_opts = ["", "-raise"] - def _remote(self, url, action): - # kfmclient is the new KDE way of opening URLs. - cmd = "kfmclient %s >/dev/null 2>&1" % action - rc = os.system(cmd) - # Fall back to other variants. - if rc: - if _iscommand("konqueror"): - rc = os.system(self.name + " --silent '%s' &" % url) - elif _iscommand("kfm"): - rc = os.system(self.name + " -d '%s' &" % url) - return not rc + remote_args = ['-remote', 'openURL(%s%action)'] + remote_action = "" + remote_action_newwin = ",new-window" + remote_action_newtab = ",new-page" + background = True - def open(self, url, new=0, autoraise=1): - # XXX Currently I know no way to prevent KFM from - # opening a new win. - assert "'" not in url - if new == 2: - action = "newTab '%s'" % url - else: - action = "openURL '%s'" % url - ok = self._remote(url, action) - return ok +class Elinks(UnixBrowser): + "Launcher class for Elinks browsers." -class Opera(UnixBrowser): - "Launcher class for Opera browser." + remote_args = ['-remote', 'openURL(%s%action)'] + remote_action = "" + remote_action_newwin = ",new-window" + remote_action_newtab = ",new-tab" + background = False + + # elinks doesn't like its stdout to be redirected - + # it uses redirected stdout as a signal to do -dump + redirect_stdout = False - raise_opts = ("", "-raise") - remote_cmd = '-remote' - remote_action = "openURL(%s)" - remote_action_newwin = "openURL(%s,new-window)" - remote_action_newtab = "openURL(%s,new-page)" - remote_background = True +class Konqueror(BaseBrowser): + """Controller for the KDE File Manager (kfm, or Konqueror). + See the output of ``kfmclient --commands`` + for more information on the Konqueror remote-control interface. + """ -class Elinks(UnixBrowser): - "Launcher class for Elinks browsers." + def open(self, url, new=0, autoraise=1): + # XXX Currently I know no way to prevent KFM from opening a new win. + if new == 2: + action = "newTab" + else: + action = "openURL" + + devnull = file(os.devnull, "r+") + # if possible, put browser in separate process group, so + # keyboard interrupts don't affect browser as well as Python + setsid = getattr(os, 'setsid', None) + if not setsid: + setsid = getattr(os, 'setpgrp', None) + + try: + p = subprocess.Popen(["kfmclient", action, url], + close_fds=True, stdin=devnull, + stdout=devnull, stderr=devnull) + except OSError: + # fall through to next variant + pass + else: + p.wait() + # kfmclient's return code unfortunately has no meaning as it seems + return True - remote_cmd = '-remote' - remote_action = "openURL(%s)" - remote_action_newwin = "openURL(%s,new-window)" - remote_action_newtab = "openURL(%s,new-tab)" - - def _remote(self, url, action, autoraise): - # elinks doesn't like its stdout to be redirected - - # it uses redirected stdout as a signal to do -dump - cmd = "%s %s '%s' 2>/dev/null" % (self.name, - self.remote_cmd, action) - rc = os.system(cmd) - if rc: - rc = os.system("%s %s" % (self.name, url)) - return not rc + try: + p = subprocess.Popen(["konqueror", "--silent", url], + close_fds=True, stdin=devnull, + stdout=devnull, stderr=devnull, + preexec_fn=setsid) + except OSError: + # fall through to next variant + pass + else: + if p.poll() is None: + # Should be running now. + return True + + try: + p = subprocess.Popen(["kfm", "-d", url], + close_fds=True, stdin=devnull, + stdout=devnull, stderr=devnull, + preexec_fn=setsid) + except OSError: + return False + else: + return (p.poll() is None) class Grail(BaseBrowser): # There should be a way to maintain a connection to Grail, but the # Grail remote control protocol doesn't really allow that at this - # point. It probably neverwill! + # point. It probably never will! def _find_grail_rc(self): import glob import pwd @@ -354,10 +442,9 @@ # if successful, register it if retncode == None and len(commd) != 0: - register("gnome", None, GenericBrowser( - commd + " '%s' >/dev/null &")) + register("gnome", None, BackgroundBrowser(commd)) - # First, the Mozilla/Netscape browsers + # First, the Mozilla/Netscape browsers for browser in ("mozilla-firefox", "firefox", "mozilla-firebird", "firebird", "mozilla", "netscape"): @@ -377,7 +464,7 @@ # Skipstone, another Gtk/Mozilla based browser if _iscommand("skipstone"): - register("skipstone", None, GenericBrowser("skipstone '%s' &")) + register("skipstone", None, BackgroundBrowser("skipstone")) # Opera, quite popular if _iscommand("opera"): @@ -385,7 +472,7 @@ # Next, Mosaic -- old but still in use. if _iscommand("mosaic"): - register("mosaic", None, GenericBrowser("mosaic '%s' &")) + register("mosaic", None, BackgroundBrowser("mosaic")) # Grail, the Python browser. Does anybody still use it? if _iscommand("grail"): @@ -399,15 +486,15 @@ if os.environ.get("TERM"): # The Links/elinks browsers if _iscommand("links"): - register("links", None, GenericBrowser("links '%s'")) + register("links", None, GenericBrowser("links")) if _iscommand("elinks"): register("elinks", None, Elinks("elinks")) # The Lynx browser , if _iscommand("lynx"): - register("lynx", None, GenericBrowser("lynx '%s'")) + register("lynx", None, GenericBrowser("lynx")) # The w3m browser if _iscommand("w3m"): - register("w3m", None, GenericBrowser("w3m '%s'")) + register("w3m", None, GenericBrowser("w3m")) # # Platform support for Windows @@ -424,7 +511,7 @@ # Prefer mozilla/netscape/opera if present for browser in ("firefox", "firebird", "mozilla", "netscape", "opera"): if _iscommand(browser): - register(browser, None, GenericBrowser(browser + ' %s')) + register(browser, None, BackgroundBrowser(browser)) register("windows-default", WindowsDefault) # @@ -460,6 +547,10 @@ def open(self, url, new=0, autoraise=1): assert "'" not in url + # hack for local urls + if not ':' in url: + url = 'file:'+url + # new must be 0 or 1 new = int(bool(new)) if self.name == "default": @@ -500,7 +591,7 @@ _tryorder = [] _browsers = {} register("os2netscape", None, - GenericBrowser("start netscape %s"), -1) + GenericBrowser(["start", "netscape", "%s"]), -1) # OK, now that we know what the default preference orders for each @@ -543,5 +634,7 @@ url = args[0] open(url, new_win) + print "\a" + if __name__ == "__main__": main() From python-checkins at python.org Fri Jan 20 22:08:57 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 22:08:57 +0100 (CET) Subject: [Python-checkins] r42122 - python/branches/release24-maint/Lib/unittest.py Message-ID: <20060120210857.591C81E402E@bag.python.org> Author: georg.brandl Date: Fri Jan 20 22:08:54 2006 New Revision: 42122 Modified: python/branches/release24-maint/Lib/unittest.py Log: Reverse checkin. This is definitely a new feature. Modified: python/branches/release24-maint/Lib/unittest.py ============================================================================== --- python/branches/release24-maint/Lib/unittest.py (original) +++ python/branches/release24-maint/Lib/unittest.py Fri Jan 20 22:08:54 2006 @@ -201,9 +201,9 @@ not have a method with the specified name. """ try: - self._testMethodName = methodName + self.__testMethodName = methodName testMethod = getattr(self, methodName) - self._testMethodDoc = testMethod.__doc__ + self.__testMethodDoc = testMethod.__doc__ except AttributeError: raise ValueError, "no such test method in %s: %s" % \ (self.__class__, methodName) @@ -229,30 +229,30 @@ The default implementation of this method returns the first line of the specified test method's docstring. """ - doc = self._testMethodDoc + doc = self.__testMethodDoc return doc and doc.split("\n")[0].strip() or None def id(self): - return "%s.%s" % (_strclass(self.__class__), self._testMethodName) + return "%s.%s" % (_strclass(self.__class__), self.__testMethodName) def __str__(self): - return "%s (%s)" % (self._testMethodName, _strclass(self.__class__)) + return "%s (%s)" % (self.__testMethodName, _strclass(self.__class__)) def __repr__(self): return "<%s testMethod=%s>" % \ - (_strclass(self.__class__), self._testMethodName) + (_strclass(self.__class__), self.__testMethodName) def run(self, result=None): if result is None: result = self.defaultTestResult() result.startTest(self) - testMethod = getattr(self, self._testMethodName) + testMethod = getattr(self, self.__testMethodName) try: try: self.setUp() except KeyboardInterrupt: raise except: - result.addError(self, self._exc_info()) + result.addError(self, self.__exc_info()) return ok = False @@ -260,18 +260,18 @@ testMethod() ok = True except self.failureException: - result.addFailure(self, self._exc_info()) + result.addFailure(self, self.__exc_info()) except KeyboardInterrupt: raise except: - result.addError(self, self._exc_info()) + result.addError(self, self.__exc_info()) try: self.tearDown() except KeyboardInterrupt: raise except: - result.addError(self, self._exc_info()) + result.addError(self, self.__exc_info()) ok = False if ok: result.addSuccess(self) finally: @@ -283,10 +283,10 @@ def debug(self): """Run the test without collecting errors in a TestResult""" self.setUp() - getattr(self, self._testMethodName)() + getattr(self, self.__testMethodName)() self.tearDown() - def _exc_info(self): + def __exc_info(self): """Return a version of sys.exc_info() with the traceback frame minimised; usually the top level of the traceback frame is not needed. From python-checkins at python.org Fri Jan 20 22:09:56 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 22:09:56 +0100 (CET) Subject: [Python-checkins] r42123 - python/branches/release24-maint/Lib/urlparse.py Message-ID: <20060120210956.3371A1E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 20 22:09:55 2006 New Revision: 42123 Modified: python/branches/release24-maint/Lib/urlparse.py Log: Revert feature checkin. Modified: python/branches/release24-maint/Lib/urlparse.py ============================================================================== --- python/branches/release24-maint/Lib/urlparse.py (original) +++ python/branches/release24-maint/Lib/urlparse.py Fri Jan 20 22:09:55 2006 @@ -9,22 +9,22 @@ # A classification of schemes ('' means apply by default) uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', - 'wais', 'file', 'https', 'shttp', 'mms', - 'prospero', 'rtsp', 'rtspu', '', 'sftp'] + 'wais', 'file', 'https', 'shttp', 'mms', + 'prospero', 'rtsp', 'rtspu', ''] uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', - 'imap', 'wais', 'file', 'mms', 'https', 'shttp', - 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', - 'svn', 'svn+ssh', 'sftp'] + 'imap', 'wais', 'file', 'mms', 'https', 'shttp', + 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', + 'svn', 'svn+ssh'] non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', - 'telnet', 'wais', 'imap', 'snews', 'sip'] + 'telnet', 'wais', 'imap', 'snews', 'sip'] uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', - 'https', 'shttp', 'rtsp', 'rtspu', 'sip', - 'mms', '', 'sftp'] + 'https', 'shttp', 'rtsp', 'rtspu', 'sip', + 'mms', ''] uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', - 'gopher', 'rtsp', 'rtspu', 'sip', ''] + 'gopher', 'rtsp', 'rtspu', 'sip', ''] uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', - 'nntp', 'wais', 'https', 'shttp', 'snews', - 'file', 'prospero', ''] + 'nntp', 'wais', 'https', 'shttp', 'snews', + 'file', 'prospero', ''] # Characters valid in scheme names scheme_chars = ('abcdefghijklmnopqrstuvwxyz' From python-checkins at python.org Fri Jan 20 22:16:56 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 22:16:56 +0100 (CET) Subject: [Python-checkins] r42124 - python/branches/release24-maint/Doc/lib/liburlparse.tex Message-ID: <20060120211656.853A31E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 20 22:16:54 2006 New Revision: 42124 Modified: python/branches/release24-maint/Doc/lib/liburlparse.tex Log: Document supported schemes. Modified: python/branches/release24-maint/Doc/lib/liburlparse.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/liburlparse.tex (original) +++ python/branches/release24-maint/Doc/lib/liburlparse.tex Fri Jan 20 22:16:54 2006 @@ -19,9 +19,14 @@ The module has been designed to match the Internet RFC on Relative Uniform Resource Locators (and discovered a bug in an earlier -draft!). +draft!). It supports the following URL schemes: +\code{file}, \code{ftp}, \code{gopher}, \code{hdl}, \code{http}, +\code{https}, \code{imap}, \code{mailto}, \code{mms}, \code{news}, +\code{nntp}, \code{prospero}, \code{rsync}, \code{rtsp}, \code{rtspu}, +\code{shttp}, \code{sip}, \code{snews}, \code{svn}, \code{svn+ssh}, +\code{telnet}, \code{wais}. -It defines the following functions: +The \module{urlparse} module defines the following functions: \begin{funcdesc}{urlparse}{urlstring\optional{, default_scheme\optional{, allow_fragments}}} Parse a URL into 6 components, returning a 6-tuple: (addressing From python-checkins at python.org Fri Jan 20 22:17:02 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 22:17:02 +0100 (CET) Subject: [Python-checkins] r42125 - python/trunk/Doc/lib/liburlparse.tex Message-ID: <20060120211702.EA4C51E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 20 22:17:01 2006 New Revision: 42125 Modified: python/trunk/Doc/lib/liburlparse.tex Log: Document supported schemes. Modified: python/trunk/Doc/lib/liburlparse.tex ============================================================================== --- python/trunk/Doc/lib/liburlparse.tex (original) +++ python/trunk/Doc/lib/liburlparse.tex Fri Jan 20 22:17:01 2006 @@ -19,9 +19,14 @@ The module has been designed to match the Internet RFC on Relative Uniform Resource Locators (and discovered a bug in an earlier -draft!). +draft!). It supports the following URL schemes: +\code{file}, \code{ftp}, \code{gopher}, \code{hdl}, \code{http}, +\code{https}, \code{imap}, \code{mailto}, \code{mms}, \code{news}, +\code{nntp}, \code{prospero}, \code{rsync}, \code{rtsp}, \code{rtspu}, +\code{sftp}, \code{shttp}, \code{sip}, \code{snews}, \code{svn}, +\code{svn+ssh}, \code{telnet}, \code{wais}. -It defines the following functions: +The \module{urlparse} module defines the following functions: \begin{funcdesc}{urlparse}{urlstring\optional{, default_scheme\optional{, allow_fragments}}} Parse a URL into 6 components, returning a 6-tuple: (addressing From python-checkins at python.org Fri Jan 20 22:19:57 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 22:19:57 +0100 (CET) Subject: [Python-checkins] r42126 - python/branches/release24-maint/Misc/NEWS Message-ID: <20060120211957.698D61E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 20 22:19:52 2006 New Revision: 42126 Modified: python/branches/release24-maint/Misc/NEWS Log: Remove NEWS entry of reverted checkin. Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Fri Jan 20 22:19:52 2006 @@ -68,8 +68,6 @@ Library ------- -- Bug #1407902: Added support for sftp:// URIs to urlparse. - - Bug #1371247: Update Windows locale identifiers in locale.py. - Bug #1394565: SimpleHTTPServer now doesn't choke on query parameters From python-checkins at python.org Fri Jan 20 22:33:57 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 22:33:57 +0100 (CET) Subject: [Python-checkins] r42127 - python/trunk/Doc/lib/libfuncs.tex Message-ID: <20060120213357.2443B1E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 20 22:33:54 2006 New Revision: 42127 Modified: python/trunk/Doc/lib/libfuncs.tex Log: Patch #1410783: add documentation links for static/classmethod Modified: python/trunk/Doc/lib/libfuncs.tex ============================================================================== --- python/trunk/Doc/lib/libfuncs.tex (original) +++ python/trunk/Doc/lib/libfuncs.tex Fri Jan 20 22:33:54 2006 @@ -151,6 +151,10 @@ Class methods are different than \Cpp{} or Java static methods. If you want those, see \function{staticmethod()} in this section. + + For more information on class methods, consult the documentation on the + standard type hierarchy in chapter 3 of the + \citetitle[../ref/types.html]{Python Reference Manual} (at the bottom). \versionadded{2.2} \versionchanged[Function decorator syntax added]{2.4} \end{funcdesc} @@ -987,7 +991,7 @@ The \code{@staticmethod} form is a function decorator -- see the description of function definitions in chapter 7 of the - \citetitle[../ref/ref.html]{Python Reference Manual} for details. + \citetitle[../ref/function.html]{Python Reference Manual} for details. It can be called either on the class (such as \code{C.f()}) or on an instance (such as \code{C().f()}). The instance is ignored except @@ -996,6 +1000,10 @@ Static methods in Python are similar to those found in Java or \Cpp. For a more advanced concept, see \function{classmethod()} in this section. + + For more information on static methods, consult the documentation on the + standard type hierarchy in chapter 3 of the + \citetitle[../ref/types.html]{Python Reference Manual} (at the bottom). \versionadded{2.2} \versionchanged[Function decorator syntax added]{2.4} \end{funcdesc} From python-checkins at python.org Fri Jan 20 22:34:02 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 22:34:02 +0100 (CET) Subject: [Python-checkins] r42128 - python/branches/release24-maint/Doc/lib/libfuncs.tex Message-ID: <20060120213402.99B9C1E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 20 22:34:01 2006 New Revision: 42128 Modified: python/branches/release24-maint/Doc/lib/libfuncs.tex Log: Patch #1410783: add documentation links for static/classmethod Modified: python/branches/release24-maint/Doc/lib/libfuncs.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libfuncs.tex (original) +++ python/branches/release24-maint/Doc/lib/libfuncs.tex Fri Jan 20 22:34:01 2006 @@ -125,6 +125,10 @@ Class methods are different than \Cpp{} or Java static methods. If you want those, see \function{staticmethod()} in this section. + + For more information on class methods, consult the documentation on the + standard type hierarchy in chapter 3 of the + \citetitle[../ref/types.html]{Python Reference Manual} (at the bottom). \versionadded{2.2} \versionchanged[Function decorator syntax added]{2.4} \end{funcdesc} @@ -949,7 +953,7 @@ The \code{@staticmethod} form is a function decorator -- see the description of function definitions in chapter 7 of the - \citetitle[../ref/ref.html]{Python Reference Manual} for details. + \citetitle[../ref/function.html]{Python Reference Manual} for details. It can be called either on the class (such as \code{C.f()}) or on an instance (such as \code{C().f()}). The instance is ignored except @@ -958,6 +962,10 @@ Static methods in Python are similar to those found in Java or \Cpp. For a more advanced concept, see \function{classmethod()} in this section. + + For more information on static methods, consult the documentation on the + standard type hierarchy in chapter 3 of the + \citetitle[../ref/types.html]{Python Reference Manual} (at the bottom). \versionadded{2.2} \versionchanged[Function decorator syntax added]{2.4} \end{funcdesc} From python-checkins at python.org Fri Jan 20 22:36:05 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 22:36:05 +0100 (CET) Subject: [Python-checkins] r42129 - python/trunk/Lib/os.py Message-ID: <20060120213605.5EB4A1E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 20 22:36:02 2006 New Revision: 42129 Modified: python/trunk/Lib/os.py Log: Patch #1410998: remove "DOS" from os.py docstring Modified: python/trunk/Lib/os.py ============================================================================== --- python/trunk/Lib/os.py (original) +++ python/trunk/Lib/os.py Fri Jan 20 22:36:02 2006 @@ -1,4 +1,4 @@ -r"""OS routines for Mac, DOS, NT, or Posix depending on what system we're on. +r"""OS routines for Mac, NT, or Posix depending on what system we're on. This exports: - all functions from posix, nt, os2, mac, or ce, e.g. unlink, stat, etc. From python-checkins at python.org Fri Jan 20 23:00:44 2006 From: python-checkins at python.org (georg.brandl) Date: Fri, 20 Jan 2006 23:00:44 +0100 (CET) Subject: [Python-checkins] r42130 - python/branches/release24-maint/Lib/urllib2.py Message-ID: <20060120220044.4106A1E4002@bag.python.org> Author: georg.brandl Date: Fri Jan 20 23:00:43 2006 New Revision: 42130 Modified: python/branches/release24-maint/Lib/urllib2.py Log: Bug #1378679: backport bugfix of #974757. Modified: python/branches/release24-maint/Lib/urllib2.py ============================================================================== --- python/branches/release24-maint/Lib/urllib2.py (original) +++ python/branches/release24-maint/Lib/urllib2.py Fri Jan 20 23:00:43 2006 @@ -720,7 +720,7 @@ return self.retry_http_basic_auth(host, req, realm) def retry_http_basic_auth(self, host, req, realm): - user,pw = self.passwd.find_user_password(realm, host) + user, pw = self.passwd.find_user_password(realm, req.get_full_url()) if pw is not None: raw = "%s:%s" % (user, pw) auth = 'Basic %s' % base64.encodestring(raw).strip() From python-checkins at python.org Sat Jan 21 00:40:56 2006 From: python-checkins at python.org (tim.peters) Date: Sat, 21 Jan 2006 00:40:56 +0100 (CET) Subject: [Python-checkins] r42131 - python/trunk/Lib/webbrowser.py Message-ID: <20060120234056.CF4941E402F@bag.python.org> Author: tim.peters Date: Sat Jan 21 00:40:56 2006 New Revision: 42131 Modified: python/trunk/Lib/webbrowser.py Log: Whitespace normalization. Modified: python/trunk/Lib/webbrowser.py ============================================================================== --- python/trunk/Lib/webbrowser.py (original) +++ python/trunk/Lib/webbrowser.py Sat Jan 21 00:40:56 2006 @@ -132,9 +132,9 @@ class BaseBrowser(object): """Parent class for all browsers. Do not use directly.""" - + args = ['%s'] - + def __init__(self, name=""): self.name = name self.basename = name @@ -163,7 +163,7 @@ self.basename = os.path.basename(self.name) def open(self, url, new=0, autoraise=1): - cmdline = [self.name] + [arg.replace("%s", url) + cmdline = [self.name] + [arg.replace("%s", url) for arg in self.args] try: p = subprocess.Popen(cmdline, close_fds=True) @@ -209,7 +209,7 @@ if opt: raise_opt = [opt] cmdline = [self.name] + raise_opt + args - + if remote or self.background: inout = file(os.devnull, "r+") else: @@ -220,7 +220,7 @@ setsid = getattr(os, 'setsid', None) if not setsid: setsid = getattr(os, 'setpgrp', None) - + p = subprocess.Popen(cmdline, close_fds=True, stdin=inout, stdout=(self.redirect_stdout and inout or None), stderr=inout, preexec_fn=setsid) @@ -257,7 +257,7 @@ else: raise Error("Bad 'new' parameter to open(); " + "expected 0, 1, or 2, got %s" % new) - + args = [arg.replace("%s", url).replace("%action", action) for arg in self.remote_args] success = self._invoke(args, True, autoraise) @@ -278,7 +278,7 @@ remote_action = "" remote_action_newwin = ",new-window" remote_action_newtab = ",new-tab" - + background = True Netscape = Mozilla @@ -334,14 +334,14 @@ action = "newTab" else: action = "openURL" - + devnull = file(os.devnull, "r+") # if possible, put browser in separate process group, so # keyboard interrupts don't affect browser as well as Python setsid = getattr(os, 'setsid', None) if not setsid: setsid = getattr(os, 'setpgrp', None) - + try: p = subprocess.Popen(["kfmclient", action, url], close_fds=True, stdin=devnull, @@ -366,7 +366,7 @@ if p.poll() is None: # Should be running now. return True - + try: p = subprocess.Popen(["kfm", "-d", url], close_fds=True, stdin=devnull, @@ -550,7 +550,7 @@ # hack for local urls if not ':' in url: url = 'file:'+url - + # new must be 0 or 1 new = int(bool(new)) if self.name == "default": From anthony at interlink.com.au Sat Jan 21 01:41:11 2006 From: anthony at interlink.com.au (Anthony Baxter) Date: Sat, 21 Jan 2006 11:41:11 +1100 Subject: [Python-checkins] r42109 - in python/branches/release24-maint: Lib/urlparse.py Misc/NEWS In-Reply-To: References: <20060120172437.C25801E405E@bag.python.org> Message-ID: <200601211141.13853.anthony@interlink.com.au> On Saturday 21 January 2006 07:29, Guido van Rossum wrote: > [adding new schemas to urlparse] > I'd like to see what Anthony thinks about this particular case. I don't think these should be added to 2.4.x. It violates the "all 2.4.x releases are feature equivalent" assertion we try to make. Suddenly someone needs to specify "You will need 2.4.3 to do this". Since 2.5 is not too far away, I think this should be reverted. -- Anthony Baxter It's never too late to have a happy childhood. From anthony at interlink.com.au Sat Jan 21 01:42:07 2006 From: anthony at interlink.com.au (Anthony Baxter) Date: Sat, 21 Jan 2006 11:42:07 +1100 Subject: [Python-checkins] r42116 - python/branches/release24-maint/Lib/unittest.py In-Reply-To: <20060120175503.4C0C71E4002@bag.python.org> References: <20060120175503.4C0C71E4002@bag.python.org> Message-ID: <200601211142.09545.anthony@interlink.com.au> On Saturday 21 January 2006 04:55, georg.brandl wrote: > Author: georg.brandl > Date: Fri Jan 20 18:55:02 2006 > New Revision: 42116 > > Modified: > python/branches/release24-maint/Lib/unittest.py > Log: > Patch #1388073: Make unittest.TestCase easier to subclass This doesn't belong in release24-maint. Please revert. Anthony -- Anthony Baxter It's never too late to have a happy childhood. From fdrake at acm.org Sat Jan 21 02:00:58 2006 From: fdrake at acm.org (Fred L. Drake, Jr.) Date: Fri, 20 Jan 2006 20:00:58 -0500 Subject: [Python-checkins] r42109 - in python/branches/release24-maint: Lib/urlparse.py Misc/NEWS In-Reply-To: <200601211141.13853.anthony@interlink.com.au> References: <20060120172437.C25801E405E@bag.python.org> <200601211141.13853.anthony@interlink.com.au> Message-ID: <200601202000.58666.fdrake@acm.org> On Friday 20 January 2006 19:41, Anthony Baxter wrote: > I don't think these should be added to 2.4.x. It violates the "all > 2.4.x releases are feature equivalent" assertion we try to make. > Suddenly someone needs to specify "You will need 2.4.3 to do this". > Since 2.5 is not too far away, I think this should be reverted. We have to specify things like that anyway just to avoid segfaults sometimes. :-) I've never thought of the list of supported URL schemes for urlparse as a feature definition, but that's not an unreasonable stance. The next question becomes: should I back out my addition of svn: and svn+ssh:? The decision that the list of URL schemes is a feature definition suggests I should, but it was already included in 2.4.2. Would removing it cause more harm than good at this point? I don't remember offhand exactly which release added those, but can look it up if it matters. -Fred -- Fred L. Drake, Jr. From anthony at interlink.com.au Sat Jan 21 02:20:38 2006 From: anthony at interlink.com.au (Anthony Baxter) Date: Sat, 21 Jan 2006 12:20:38 +1100 Subject: [Python-checkins] r42109 - in python/branches/release24-maint: Lib/urlparse.py Misc/NEWS In-Reply-To: <200601202000.58666.fdrake@acm.org> References: <20060120172437.C25801E405E@bag.python.org> <200601211141.13853.anthony@interlink.com.au> <200601202000.58666.fdrake@acm.org> Message-ID: <200601211220.40933.anthony@interlink.com.au> On Saturday 21 January 2006 12:00, Fred L. Drake, Jr. wrote: > The next question becomes: should I back out my addition of svn: > and svn+ssh:? The decision that the list of URL schemes is a > feature definition suggests I should, but it was already included > in 2.4.2. Would removing it cause more harm than good at this > point? I don't remember offhand exactly which release added those, > but can look it up if it matters. Not if they've already been in a release. From python-checkins at python.org Sat Jan 21 03:47:55 2006 From: python-checkins at python.org (tim.peters) Date: Sat, 21 Jan 2006 03:47:55 +0100 (CET) Subject: [Python-checkins] r42132 - python/trunk/Modules/_sre.c Message-ID: <20060121024755.BEDE31E4002@bag.python.org> Author: tim.peters Date: Sat Jan 21 03:47:53 2006 New Revision: 42132 Modified: python/trunk/Modules/_sre.c Log: _compile(): raise an exception if downcasting to SRE_CODE loses information: OverflowError: regular expression code size limit exceeded Otherwise the compiled code is gibberish, possibly leading at least to wrong results or (as reported on c.l.py) internal sre errors at match time. I'm not sure how to test this. SRE_CODE is a 2-byte type on my box, and it's easy to create a regexp that causes the new exception to trigger here. But it may be a 4-byte type on other boxes, and creating a regexp large enough to trigger problems there would be pretty crazy. Bugfix candidate. Modified: python/trunk/Modules/_sre.c ============================================================================== --- python/trunk/Modules/_sre.c (original) +++ python/trunk/Modules/_sre.c Sat Jan 21 03:47:53 2006 @@ -451,7 +451,7 @@ if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15)))) return ok; set += 16; - } + } else { /* (32 bits per code word) */ if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31)))) @@ -492,7 +492,7 @@ else block = -1; set += 64; - if (block >=0 && + if (block >=0 && (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31)))) return ok; set += count*8; @@ -568,7 +568,7 @@ while (ptr < end && (SRE_CODE) *ptr != chr) ptr++; break; - + case SRE_OP_NOT_LITERAL_IGNORE: /* repeated non-literal */ chr = pattern[1]; @@ -644,7 +644,7 @@ * - Recursive SRE_MATCH() returned false, and will continue the * outside 'for' loop: must be protected when breaking, since the next * OP could potentially depend on lastmark; - * + * * - Recursive SRE_MATCH() returned false, and will be called again * inside a local for/while loop: must be protected between each * loop iteration, since the recursive SRE_MATCH() could do anything, @@ -848,7 +848,7 @@ /* state->lastmark is the highest valid index in the state->mark array. If it is increased by more than 1, the intervening marks must be set to NULL to signal - that these marks have not been encountered. */ + that these marks have not been encountered. */ int j = state->lastmark + 1; while (j < i) state->mark[j++] = NULL; @@ -1065,9 +1065,9 @@ RETURN_ON_ERROR(ret); RETURN_SUCCESS; } - + LASTMARK_RESTORE(); - + ctx->ptr--; ctx->count--; } @@ -1115,7 +1115,7 @@ RETURN_ON_ERROR(ret); DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos); if (ret < (int) ctx->pattern[1]) - /* didn't match minimum number of times */ + /* didn't match minimum number of times */ RETURN_FAILURE; /* advance past minimum matches of repeat */ ctx->count = ret; @@ -1546,7 +1546,6 @@ } break; } - } ptr++; } @@ -1601,7 +1600,7 @@ return status; } - + LOCAL(int) SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len) { @@ -1652,10 +1651,14 @@ for (i = 0; i < n; i++) { PyObject *o = PyList_GET_ITEM(code, i); - if (PyInt_Check(o)) - self->code[i] = (SRE_CODE) PyInt_AsLong(o); - else - self->code[i] = (SRE_CODE) PyLong_AsUnsignedLong(o); + unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o) + : PyLong_AsUnsignedLong(o); + self->code[i] = (SRE_CODE) value; + if ((unsigned long) self->code[i] != value) { + PyErr_SetString(PyExc_OverflowError, + "regular expression code size limit exceeded"); + break; + } } if (PyErr_Occurred()) { @@ -1724,7 +1727,7 @@ /* given a python object, return a data pointer, a length (in characters), and a character size. return NULL if the object is not a string (or not compatible) */ - + PyBufferProcs *buffer; int size, bytes, charsize; void* ptr; @@ -2203,7 +2206,7 @@ while (state.start <= state.end) { PyObject* item; - + state_reset(&state); state.ptr = state.start; @@ -2222,7 +2225,7 @@ pattern_error(status); goto error; } - + /* don't bother to build a match object */ switch (self->groups) { case 0: @@ -2271,7 +2274,7 @@ Py_DECREF(list); state_fini(&state); return NULL; - + } #if PY_VERSION_HEX >= 0x02020000 @@ -2349,7 +2352,7 @@ pattern_error(status); goto error; } - + if (state.start == state.ptr) { if (last == state.end) break; @@ -2405,7 +2408,7 @@ Py_DECREF(list); state_fini(&state); return NULL; - + } static PyObject* @@ -2496,7 +2499,7 @@ pattern_error(status); goto error; } - + b = STATE_OFFSET(&state, state.start); e = STATE_OFFSET(&state, state.ptr); @@ -2542,7 +2545,7 @@ if (status < 0) goto error; } - + i = e; n = n + 1; @@ -2586,7 +2589,7 @@ state_fini(&state); Py_DECREF(filter); return NULL; - + } static PyObject* @@ -2626,7 +2629,7 @@ if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__")) return NULL; - + copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize); if (!copy) return NULL; @@ -2653,7 +2656,7 @@ { #ifdef USE_BUILTIN_COPY PatternObject* copy; - + PyObject* memo; if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo)) return NULL; @@ -2701,28 +2704,28 @@ PyDoc_STRVAR(pattern_sub_doc, "sub(repl, string[, count = 0]) --> newstring\n\ Return the string obtained by replacing the leftmost non-overlapping\n\ - occurrences of pattern in string by the replacement repl."); + occurrences of pattern in string by the replacement repl."); PyDoc_STRVAR(pattern_subn_doc, "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\ Return the tuple (new_string, number_of_subs_made) found by replacing\n\ the leftmost non-overlapping occurrences of pattern with the\n\ - replacement repl."); + replacement repl."); PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects"); static PyMethodDef pattern_methods[] = { - {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS, + {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS, pattern_match_doc}, - {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS, + {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS, pattern_search_doc}, {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS, pattern_sub_doc}, {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS, pattern_subn_doc}, - {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS, + {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS, pattern_split_doc}, - {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS, + {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS, pattern_findall_doc}, #if PY_VERSION_HEX >= 0x02020000 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS, @@ -2734,7 +2737,7 @@ {NULL, NULL} }; -static PyObject* +static PyObject* pattern_getattr(PatternObject* self, char* name) { PyObject* res; @@ -3113,7 +3116,7 @@ #ifdef USE_BUILTIN_COPY MatchObject* copy; int slots, offset; - + if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__")) return NULL; @@ -3146,7 +3149,7 @@ { #ifdef USE_BUILTIN_COPY MatchObject* copy; - + PyObject* memo; if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo)) return NULL; @@ -3181,7 +3184,7 @@ {NULL, NULL} }; -static PyObject* +static PyObject* match_getattr(MatchObject* self, char* name) { PyObject* res; @@ -3337,7 +3340,7 @@ {NULL, NULL} }; -static PyObject* +static PyObject* scanner_getattr(ScannerObject* self, char* name) { PyObject* res; @@ -3374,7 +3377,7 @@ {NULL, NULL} }; -#if PY_VERSION_HEX < 0x02030000 +#if PY_VERSION_HEX < 0x02030000 DL_EXPORT(void) init_sre(void) #else PyMODINIT_FUNC init_sre(void) From python-checkins at python.org Sat Jan 21 08:21:00 2006 From: python-checkins at python.org (georg.brandl) Date: Sat, 21 Jan 2006 08:21:00 +0100 (CET) Subject: [Python-checkins] r42133 - in python/trunk: Lib/urllib2.py Misc/NEWS Message-ID: <20060121072100.91B231E4002@bag.python.org> Author: georg.brandl Date: Sat Jan 21 08:20:56 2006 New Revision: 42133 Modified: python/trunk/Lib/urllib2.py python/trunk/Misc/NEWS Log: Bug #902075: urllib2 now handles "host:port" proxy specifications Can/should this be backported? Modified: python/trunk/Lib/urllib2.py ============================================================================== --- python/trunk/Lib/urllib2.py (original) +++ python/trunk/Lib/urllib2.py Sat Jan 21 08:20:56 2006 @@ -579,14 +579,19 @@ def proxy_open(self, req, proxy, type): orig_type = req.get_type() type, r_type = splittype(proxy) - host, XXX = splithost(r_type) - if '@' in host: - user_pass, host = host.split('@', 1) - if ':' in user_pass: - user, password = user_pass.split(':', 1) - user_pass = base64.encodestring('%s:%s' % (unquote(user), - unquote(password))).strip() - req.add_header('Proxy-authorization', 'Basic ' + user_pass) + if not type or r_type.isdigit(): + # proxy is specified without protocol + type = orig_type + host = proxy + else: + host, r_host = splithost(r_type) + user_pass, host = splituser(host) + user, password = splitpasswd(user_pass) + if user and password: + user, password = user_pass.split(':', 1) + user_pass = base64.encodestring('%s:%s' % (unquote(user), + unquote(password))).strip() + req.add_header('Proxy-authorization', 'Basic ' + user_pass) host = unquote(host) req.set_proxy(host, type) if orig_type == type: Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Sat Jan 21 08:20:56 2006 @@ -337,7 +337,9 @@ Library ------- -- Bug #1407902: Added support for sftp:// URIs to urlparse. +- Bug #902075: urllib2 now supports 'host:port' style proxy specifications. + +- Bug #1407902: Add support for sftp:// URIs to urlparse. - Bug #1371247: Update Windows locale identifiers in locale.py. From python-checkins at python.org Sat Jan 21 11:50:39 2006 From: python-checkins at python.org (walter.doerwald) Date: Sat, 21 Jan 2006 11:50:39 +0100 (CET) Subject: [Python-checkins] r42134 - python/trunk/Doc/howto/curses.tex Message-ID: <20060121105039.9EFAD1E4002@bag.python.org> Author: walter.doerwald Date: Sat Jan 21 11:50:39 2006 New Revision: 42134 Modified: python/trunk/Doc/howto/curses.tex Log: Fix typo Modified: python/trunk/Doc/howto/curses.tex ============================================================================== --- python/trunk/Doc/howto/curses.tex (original) +++ python/trunk/Doc/howto/curses.tex Sat Jan 21 11:50:39 2006 @@ -75,7 +75,7 @@ This HOWTO is simply an introduction to writing text-mode programs with curses and Python. It doesn't attempt to be a complete guide to -the curses API; for that, see the Python library guide's serction on +the curses API; for that, see the Python library guide's section on ncurses, and the C manual pages for ncurses. It will, however, give you the basic ideas. From python-checkins at python.org Sat Jan 21 15:14:16 2006 From: python-checkins at python.org (andrew.kuchling) Date: Sat, 21 Jan 2006 15:14:16 +0100 (CET) Subject: [Python-checkins] r42135 - sandbox/trunk/pycon/parse-sched.py Message-ID: <20060121141416.E1B8F1E4002@bag.python.org> Author: andrew.kuchling Date: Sat Jan 21 15:14:16 2006 New Revision: 42135 Modified: sandbox/trunk/pycon/parse-sched.py Log: Change #! line; fix bug in determining row span (code would walk off the end of the list) Modified: sandbox/trunk/pycon/parse-sched.py ============================================================================== --- sandbox/trunk/pycon/parse-sched.py (original) +++ sandbox/trunk/pycon/parse-sched.py Sat Jan 21 15:14:16 2006 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # Reads a page with a wiki-format table @@ -93,8 +93,11 @@ if len(time_list) == 0: return 1 span = 0 - while (time_list[span][0] < end_time and span < len(time_list)): + while (span < len(time_list) and + (time_list[span][0] < end_time and span < len(time_list))): span += 1 + if span == len(time_list): + return 1 return span+1 From python-checkins at python.org Sun Jan 22 12:58:40 2006 From: python-checkins at python.org (vinay.sajip) Date: Sun, 22 Jan 2006 12:58:40 +0100 (CET) Subject: [Python-checkins] r42136 - python/trunk/Doc/lib/liblogging.tex Message-ID: <20060122115840.69E001E400D@bag.python.org> Author: vinay.sajip Date: Sun Jan 22 12:58:39 2006 New Revision: 42136 Modified: python/trunk/Doc/lib/liblogging.tex Log: Misc. changes, including documenting the ability to specify a class attribute in Formatter configuration. Contributed by Shane Hathaway. Modified: python/trunk/Doc/lib/liblogging.tex ============================================================================== --- python/trunk/Doc/lib/liblogging.tex (original) +++ python/trunk/Doc/lib/liblogging.tex Sun Jan 22 12:58:39 2006 @@ -867,10 +867,10 @@ \subsubsection{StreamHandler} -The \class{StreamHandler} class sends logging output to streams such as -\var{sys.stdout}, \var{sys.stderr} or any file-like object (or, more -precisely, any object which supports \method{write()} and \method{flush()} -methods). +The \class{StreamHandler} class, located in the core \module{logging} +package, sends logging output to streams such as \var{sys.stdout}, +\var{sys.stderr} or any file-like object (or, more precisely, any +object which supports \method{write()} and \method{flush()} methods). \begin{classdesc}{StreamHandler}{\optional{strm}} Returns a new instance of the \class{StreamHandler} class. If \var{strm} is @@ -894,8 +894,9 @@ \subsubsection{FileHandler} -The \class{FileHandler} class sends logging output to a disk file. -It inherits the output functionality from \class{StreamHandler}. +The \class{FileHandler} class, located in the core \module{logging} +package, sends logging output to a disk file. It inherits the output +functionality from \class{StreamHandler}. \begin{classdesc}{FileHandler}{filename\optional{, mode}} Returns a new instance of the \class{FileHandler} class. The specified @@ -914,7 +915,8 @@ \subsubsection{RotatingFileHandler} -The \class{RotatingFileHandler} class supports rotation of disk log files. +The \class{RotatingFileHandler} class, located in the \module{logging.handlers} +module, supports rotation of disk log files. \begin{classdesc}{RotatingFileHandler}{filename\optional{, mode\optional{, maxBytes\optional{, backupCount}}}} @@ -950,7 +952,8 @@ \subsubsection{TimedRotatingFileHandler} -The \class{TimedRotatingFileHandler} class supports rotation of disk log files +The \class{TimedRotatingFileHandler} class, located in the +\module{logging.handlers} module, supports rotation of disk log files at certain timed intervals. \begin{classdesc}{TimedRotatingFileHandler}{filename @@ -996,7 +999,8 @@ \subsubsection{SocketHandler} -The \class{SocketHandler} class sends logging output to a network +The \class{SocketHandler} class, located in the +\module{logging.handlers} module, sends logging output to a network socket. The base class uses a TCP socket. \begin{classdesc}{SocketHandler}{host, port} @@ -1044,7 +1048,8 @@ \subsubsection{DatagramHandler} -The \class{DatagramHandler} class inherits from \class{SocketHandler} +The \class{DatagramHandler} class, located in the +\module{logging.handlers} module, inherits from \class{SocketHandler} to support sending logging messages over UDP sockets. \begin{classdesc}{DatagramHandler}{host, port} @@ -1072,8 +1077,9 @@ \subsubsection{SysLogHandler} -The \class{SysLogHandler} class supports sending logging messages to a -remote or local \UNIX{} syslog. +The \class{SysLogHandler} class, located in the +\module{logging.handlers} module, supports sending logging messages to +a remote or local \UNIX{} syslog. \begin{classdesc}{SysLogHandler}{\optional{address\optional{, facility}}} Returns a new instance of the \class{SysLogHandler} class intended to @@ -1101,9 +1107,10 @@ \subsubsection{NTEventLogHandler} -The \class{NTEventLogHandler} class supports sending logging messages -to a local Windows NT, Windows 2000 or Windows XP event log. Before -you can use it, you need Mark Hammond's Win32 extensions for Python +The \class{NTEventLogHandler} class, located in the +\module{logging.handlers} module, supports sending logging messages to +a local Windows NT, Windows 2000 or Windows XP event log. Before you +can use it, you need Mark Hammond's Win32 extensions for Python installed. \begin{classdesc}{NTEventLogHandler}{appname\optional{, @@ -1163,8 +1170,9 @@ \subsubsection{SMTPHandler} -The \class{SMTPHandler} class supports sending logging messages to an email -address via SMTP. +The \class{SMTPHandler} class, located in the +\module{logging.handlers} module, supports sending logging messages to +an email address via SMTP. \begin{classdesc}{SMTPHandler}{mailhost, fromaddr, toaddrs, subject} Returns a new instance of the \class{SMTPHandler} class. The @@ -1186,10 +1194,11 @@ \subsubsection{MemoryHandler} -The \class{MemoryHandler} supports buffering of logging records in memory, -periodically flushing them to a \dfn{target} handler. Flushing occurs -whenever the buffer is full, or when an event of a certain severity or -greater is seen. +The \class{MemoryHandler} class, located in the +\module{logging.handlers} module, supports buffering of logging +records in memory, periodically flushing them to a \dfn{target} +handler. Flushing occurs whenever the buffer is full, or when an event +of a certain severity or greater is seen. \class{MemoryHandler} is a subclass of the more general \class{BufferingHandler}, which is an abstract class. This buffers logging @@ -1247,8 +1256,9 @@ \subsubsection{HTTPHandler} -The \class{HTTPHandler} class supports sending logging messages to a -Web server, using either \samp{GET} or \samp{POST} semantics. +The \class{HTTPHandler} class, located in the +\module{logging.handlers} module, supports sending logging messages to +a Web server, using either \samp{GET} or \samp{POST} semantics. \begin{classdesc}{HTTPHandler}{host, url\optional{, method}} Returns a new instance of the \class{HTTPHandler} class. The @@ -1411,12 +1421,12 @@ \subsubsection{Configuration functions% \label{logging-config-api}} -The following functions allow the logging module to be -configured. Before they can be used, you must import -\module{logging.config}. Their use is optional --- you can configure -the logging module entirely by making calls to the main API (defined -in \module{logging} itself) and defining handlers which are declared -either in \module{logging} or \module{logging.handlers}. +The following functions configure the logging module. They are located in the +\module{logging.config} module. Their use is optional --- you can configure +the logging module using these functions or by making calls to the +main API (defined in \module{logging} itself) and defining handlers +which are declared either in \module{logging} or +\module{logging.handlers}. \begin{funcdesc}{fileConfig}{fname\optional{, defaults}} Reads the logging configuration from a ConfigParser-format file named @@ -1604,6 +1614,7 @@ [formatter_form01] format=F1 %(asctime)s %(levelname)s %(message)s datefmt= +class=logging.Formatter \end{verbatim} The \code{format} entry is the overall format string, and the @@ -1613,3 +1624,9 @@ The ISO8601 format also specifies milliseconds, which are appended to the result of using the above format string, with a comma separator. An example time in ISO8601 format is \code{2003-01-23 00:29:50,411}. + +The \code{class} entry is optional. It indicates the name of the +formatter's class (as a dotted module and class name.) This option is +useful for instantiating a \class{Formatter} subclass. Subclasses of +\class{Formatter} can present exception tracebacks in an expanded or +condensed format. From python-checkins at python.org Sun Jan 22 14:49:22 2006 From: python-checkins at python.org (georg.brandl) Date: Sun, 22 Jan 2006 14:49:22 +0100 (CET) Subject: [Python-checkins] r42137 - python/trunk/Doc/lib/libcgi.tex Message-ID: <20060122134922.24D711E4002@bag.python.org> Author: georg.brandl Date: Sun Jan 22 14:49:21 2006 New Revision: 42137 Modified: python/trunk/Doc/lib/libcgi.tex Log: Remove reference to Grail and Netscape 2.0 from cgi docs. Modified: python/trunk/Doc/lib/libcgi.tex ============================================================================== --- python/trunk/Doc/lib/libcgi.tex (original) +++ python/trunk/Doc/lib/libcgi.tex Sun Jan 22 14:49:21 2006 @@ -36,8 +36,7 @@ to take care of the different cases and provide a simpler interface to the Python script. It also provides a number of utilities that help in debugging scripts, and the latest addition is support for file -uploads from a form (if your browser supports it --- Grail 0.3 and -Netscape 2.0 do). +uploads from a form (if your browser supports it). The output of a CGI script should consist of two sections, separated by a blank line. The first section contains a number of headers, From python-checkins at python.org Sun Jan 22 14:49:24 2006 From: python-checkins at python.org (georg.brandl) Date: Sun, 22 Jan 2006 14:49:24 +0100 (CET) Subject: [Python-checkins] r42138 - python/branches/release24-maint/Doc/lib/libcgi.tex Message-ID: <20060122134924.E8A421E4009@bag.python.org> Author: georg.brandl Date: Sun Jan 22 14:49:24 2006 New Revision: 42138 Modified: python/branches/release24-maint/Doc/lib/libcgi.tex Log: Remove reference to Grail and Netscape 2.0 from cgi docs. Modified: python/branches/release24-maint/Doc/lib/libcgi.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libcgi.tex (original) +++ python/branches/release24-maint/Doc/lib/libcgi.tex Sun Jan 22 14:49:24 2006 @@ -36,8 +36,7 @@ to take care of the different cases and provide a simpler interface to the Python script. It also provides a number of utilities that help in debugging scripts, and the latest addition is support for file -uploads from a form (if your browser supports it --- Grail 0.3 and -Netscape 2.0 do). +uploads from a form (if your browser supports it). The output of a CGI script should consist of two sections, separated by a blank line. The first section contains a number of headers, From python-checkins at python.org Sun Jan 22 14:52:09 2006 From: python-checkins at python.org (georg.brandl) Date: Sun, 22 Jan 2006 14:52:09 +0100 (CET) Subject: [Python-checkins] r42139 - python/branches/release24-maint/Doc/lib/libwinreg.tex Message-ID: <20060122135209.1ED081E4002@bag.python.org> Author: georg.brandl Date: Sun Jan 22 14:52:08 2006 New Revision: 42139 Modified: python/branches/release24-maint/Doc/lib/libwinreg.tex Log: Backport winreg documentation quirk fix. Modified: python/branches/release24-maint/Doc/lib/libwinreg.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libwinreg.tex (original) +++ python/branches/release24-maint/Doc/lib/libwinreg.tex Sun Jan 22 14:52:08 2006 @@ -25,9 +25,9 @@ Closes a previously opened registry key. The hkey argument specifies a previously opened key. - Note that if \var{hkey} is not closed using this method, (or the - \method{handle.Close()} closed when the \var{hkey} object is - destroyed by Python. + Note that if \var{hkey} is not closed using this method (or via + \method{handle.Close()}), it is closed when the \var{hkey} object + is destroyed by Python. \end{funcdesc} From python-checkins at python.org Sun Jan 22 17:11:01 2006 From: python-checkins at python.org (georg.brandl) Date: Sun, 22 Jan 2006 17:11:01 +0100 (CET) Subject: [Python-checkins] r42140 - python/trunk/Doc/lib/libfuture.tex Message-ID: <20060122161101.BFCE91E4002@bag.python.org> Author: georg.brandl Date: Sun Jan 22 17:11:01 2006 New Revision: 42140 Modified: python/trunk/Doc/lib/libfuture.tex Log: Typo Modified: python/trunk/Doc/lib/libfuture.tex ============================================================================== --- python/trunk/Doc/lib/libfuture.tex (original) +++ python/trunk/Doc/lib/libfuture.tex Sun Jan 22 17:11:01 2006 @@ -64,6 +64,6 @@ \var{CompilerFlag} is the (bitfield) flag that should be passed in the fourth argument to the builtin function \function{compile()} to enable the feature in dynamically compiled code. This flag is stored in the -\member{compiler_flag} attribute on \class{_Future} instances. +\member{compiler_flag} attribute on \class{_Feature} instances. No feature description will ever be deleted from \module{__future__}. From python-checkins at python.org Sun Jan 22 17:11:04 2006 From: python-checkins at python.org (georg.brandl) Date: Sun, 22 Jan 2006 17:11:04 +0100 (CET) Subject: [Python-checkins] r42141 - python/branches/release24-maint/Doc/lib/libfuture.tex Message-ID: <20060122161104.EBDEE1E4002@bag.python.org> Author: georg.brandl Date: Sun Jan 22 17:11:04 2006 New Revision: 42141 Modified: python/branches/release24-maint/Doc/lib/libfuture.tex Log: Typo Modified: python/branches/release24-maint/Doc/lib/libfuture.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libfuture.tex (original) +++ python/branches/release24-maint/Doc/lib/libfuture.tex Sun Jan 22 17:11:04 2006 @@ -64,6 +64,6 @@ \var{CompilerFlag} is the (bitfield) flag that should be passed in the fourth argument to the builtin function \function{compile()} to enable the feature in dynamically compiled code. This flag is stored in the -\member{compiler_flag} attribute on \class{_Future} instances. +\member{compiler_flag} attribute on \class{_Feature} instances. No feature description will ever be deleted from \module{__future__}. From python-checkins at python.org Sun Jan 22 20:35:01 2006 From: python-checkins at python.org (georg.brandl) Date: Sun, 22 Jan 2006 20:35:01 +0100 (CET) Subject: [Python-checkins] r42142 - python/trunk/Doc/lib/libos.tex Message-ID: <20060122193501.1448B1E4002@bag.python.org> Author: georg.brandl Date: Sun Jan 22 20:34:59 2006 New Revision: 42142 Modified: python/trunk/Doc/lib/libos.tex Log: Correct several typos in libos.tex. Bug #1412227. Modified: python/trunk/Doc/lib/libos.tex ============================================================================== --- python/trunk/Doc/lib/libos.tex (original) +++ python/trunk/Doc/lib/libos.tex Sun Jan 22 20:34:59 2006 @@ -418,7 +418,7 @@ (Note that \code{\var{child_stdin}, \var{child_stdout}, and \var{child_stderr}} are named from the point of view of the child -process, i.e. \var{child_stdin} is the child's standard input.) +process, so \var{child_stdin} is the child's standard input.) This functionality is also available in the \refmodule{popen2} module using functions of the same names, but the return values of those @@ -1008,7 +1008,7 @@ \versionchanged [If \function{stat_float_times} returns true, the time values are floats, measuring seconds. Fractions of a second may be reported if the system supports that. On Mac OS, the times are always -floats. See \function{stat_float_times} for further discussion. ]{2.3} +floats. See \function{stat_float_times} for further discussion]{2.3} On some \UNIX{} systems (such as Linux), the following attributes may also be available: @@ -1018,8 +1018,8 @@ \member{st_flags} (user defined flags for file). On other \UNIX{} systems (such as FreeBSD), the following attributes -may be available (but may be only filled out of root tries to -use them: +may be available (but may be only filled out if root tries to +use them): \member{st_gen} (file generation number), \member{st_birthtime} (time of file creation). @@ -1069,9 +1069,9 @@ \begin{funcdesc}{stat_float_times}{\optional{newvalue}} Determine whether \class{stat_result} represents time stamps as float -objects. If newval is True, future calls to stat() return floats, if -it is False, future calls return ints. If newval is omitted, return -the current setting. +objects. If \var{newvalue} is \code{True}, future calls to \function{stat()} +return floats, if it is \code{False}, future calls return ints. +If \var{newvalue} is omitted, return the current setting. For compatibility with older Python versions, accessing \class{stat_result} as a tuple always returns integers. @@ -1080,7 +1080,7 @@ which do not work correctly with floating point time stamps can use this function to restore the old behaviour]{2.5} -The resolution of the timestamps (i.e. the smallest possible fraction) +The resolution of the timestamps (that is the smallest possible fraction) depends on the system. Some systems only support second resolution; on these systems, the fraction will always be zero. @@ -1224,7 +1224,7 @@ By default errors from the \code{os.listdir()} call are ignored. If optional argument \var{onerror} is specified, it should be a function; -it will be called with one argument, an os.error instance. It can +it will be called with one argument, an \exception{OSError} instance. It can report the error to continue with the walk, or raise the exception to abort the walk. Note that the filename is available as the \code{filename} attribute of the exception object. @@ -1833,8 +1833,8 @@ \begin{funcdesc}{getloadavg}{} Return the number of processes in the system run queue averaged over -the last 1, 5, and 15 minutes or raises OSError if the load average -was unobtainable. +the last 1, 5, and 15 minutes or raises \exception{OSError} if the load +average was unobtainable. \versionadded{2.3} \end{funcdesc} From python-checkins at python.org Sun Jan 22 20:35:05 2006 From: python-checkins at python.org (georg.brandl) Date: Sun, 22 Jan 2006 20:35:05 +0100 (CET) Subject: [Python-checkins] r42143 - python/branches/release24-maint/Doc/lib/libos.tex Message-ID: <20060122193505.B1A9B1E400D@bag.python.org> Author: georg.brandl Date: Sun Jan 22 20:35:04 2006 New Revision: 42143 Modified: python/branches/release24-maint/Doc/lib/libos.tex Log: Correct several typos in libos.tex. Bug #1412227. Modified: python/branches/release24-maint/Doc/lib/libos.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libos.tex (original) +++ python/branches/release24-maint/Doc/lib/libos.tex Sun Jan 22 20:35:04 2006 @@ -418,7 +418,7 @@ (Note that \code{\var{child_stdin}, \var{child_stdout}, and \var{child_stderr}} are named from the point of view of the child -process, i.e. \var{child_stdin} is the child's standard input.) +process, so \var{child_stdin} is the child's standard input.) This functionality is also available in the \refmodule{popen2} module using functions of the same names, but the return values of those @@ -978,7 +978,7 @@ \versionchanged [If \function{stat_float_times} returns true, the time values are floats, measuring seconds. Fractions of a second may be reported if the system supports that. On Mac OS, the times are always -floats. See \function{stat_float_times} for further discussion. ]{2.3} +floats. See \function{stat_float_times} for further discussion]{2.3} On some Unix systems (such as Linux), the following attributes may also be available: @@ -1031,9 +1031,9 @@ \begin{funcdesc}{stat_float_times}{\optional{newvalue}} Determine whether \class{stat_result} represents time stamps as float -objects. If newval is True, future calls to stat() return floats, if -it is False, future calls return ints. If newval is omitted, return -the current setting. +objects. If \var{newvalue} is \code{True}, future calls to \function{stat()} +return floats, if it is \code{False}, future calls return ints. +If \var{newvalue} is omitted, return the current setting. For compatibility with older Python versions, accessing \class{stat_result} as a tuple always returns integers. For @@ -1187,7 +1187,7 @@ By default errors from the \code{os.listdir()} call are ignored. If optional argument \var{onerror} is specified, it should be a function; -it will be called with one argument, an os.error instance. It can +it will be called with one argument, an \exception{OSError} instance. It can report the error to continue with the walk, or raise the exception to abort the walk. Note that the filename is available as the \code{filename} attribute of the exception object. @@ -1796,8 +1796,8 @@ \begin{funcdesc}{getloadavg}{} Return the number of processes in the system run queue averaged over -the last 1, 5, and 15 minutes or raises OSError if the load average -was unobtainable. +the last 1, 5, and 15 minutes or raises \exception{OSError} if the load +average was unobtainable. \versionadded{2.3} \end{funcdesc} From python-checkins at python.org Sun Jan 22 20:38:05 2006 From: python-checkins at python.org (georg.brandl) Date: Sun, 22 Jan 2006 20:38:05 +0100 (CET) Subject: [Python-checkins] r42144 - python/trunk/Doc/lib/libtime.tex Message-ID: <20060122193805.7A4BF1E4002@bag.python.org> Author: georg.brandl Date: Sun Jan 22 20:38:05 2006 New Revision: 42144 Modified: python/trunk/Doc/lib/libtime.tex Log: typo Modified: python/trunk/Doc/lib/libtime.tex ============================================================================== --- python/trunk/Doc/lib/libtime.tex (original) +++ python/trunk/Doc/lib/libtime.tex Sun Jan 22 20:38:05 2006 @@ -225,7 +225,8 @@ if any field in \var{t} is outside of the allowed range. \versionchanged[Allowed \var{t} to be omitted]{2.1} \versionchanged[\exception{ValueError} raised if a field in \var{t} is -out of range.]{2.4} +out of range]{2.4} + The following directives can be embedded in the \var{format} string. They are shown without the optional field width and precision From python-checkins at python.org Sun Jan 22 20:38:08 2006 From: python-checkins at python.org (georg.brandl) Date: Sun, 22 Jan 2006 20:38:08 +0100 (CET) Subject: [Python-checkins] r42145 - python/branches/release24-maint/Doc/lib/libtime.tex Message-ID: <20060122193808.2072E1E4002@bag.python.org> Author: georg.brandl Date: Sun Jan 22 20:38:07 2006 New Revision: 42145 Modified: python/branches/release24-maint/Doc/lib/libtime.tex Log: typo Modified: python/branches/release24-maint/Doc/lib/libtime.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libtime.tex (original) +++ python/branches/release24-maint/Doc/lib/libtime.tex Sun Jan 22 20:38:07 2006 @@ -225,7 +225,8 @@ if any field in \var{t} is outside of the allowed range. \versionchanged[Allowed \var{t} to be omitted]{2.1} \versionchanged[\exception{ValueError} raised if a field in \var{t} is -out of range.]{2.4} +out of range]{2.4} + The following directives can be embedded in the \var{format} string. They are shown without the optional field width and precision From python-checkins at python.org Sun Jan 22 21:47:27 2006 From: python-checkins at python.org (georg.brandl) Date: Sun, 22 Jan 2006 21:47:27 +0100 (CET) Subject: [Python-checkins] r42146 - python/trunk/Doc/lib/libos.tex Message-ID: <20060122204727.55C631E4002@bag.python.org> Author: georg.brandl Date: Sun Jan 22 21:47:26 2006 New Revision: 42146 Modified: python/trunk/Doc/lib/libos.tex Log: Another typo. Modified: python/trunk/Doc/lib/libos.tex ============================================================================== --- python/trunk/Doc/lib/libos.tex (original) +++ python/trunk/Doc/lib/libos.tex Sun Jan 22 21:47:26 2006 @@ -1220,7 +1220,7 @@ about directories the caller creates or renames before it resumes \function{walk()} again. Modifying \var{dirnames} when \var{topdown} is false is ineffective, because in bottom-up mode the directories in -\var{dirnames} are generated before \var{dirnames} itself is generated. +\var{dirnames} are generated before \var{dirpath} itself is generated. By default errors from the \code{os.listdir()} call are ignored. If optional argument \var{onerror} is specified, it should be a function; From python-checkins at python.org Sun Jan 22 21:47:31 2006 From: python-checkins at python.org (georg.brandl) Date: Sun, 22 Jan 2006 21:47:31 +0100 (CET) Subject: [Python-checkins] r42147 - python/branches/release24-maint/Doc/lib/libos.tex Message-ID: <20060122204731.5AD9C1E4002@bag.python.org> Author: georg.brandl Date: Sun Jan 22 21:47:30 2006 New Revision: 42147 Modified: python/branches/release24-maint/Doc/lib/libos.tex Log: Another typo. Modified: python/branches/release24-maint/Doc/lib/libos.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libos.tex (original) +++ python/branches/release24-maint/Doc/lib/libos.tex Sun Jan 22 21:47:30 2006 @@ -1183,7 +1183,7 @@ about directories the caller creates or renames before it resumes \function{walk()} again. Modifying \var{dirnames} when \var{topdown} is false is ineffective, because in bottom-up mode the directories in -\var{dirnames} are generated before \var{dirnames} itself is generated. +\var{dirnames} are generated before \var{dirpath} itself is generated. By default errors from the \code{os.listdir()} call are ignored. If optional argument \var{onerror} is specified, it should be a function; From python-checkins at python.org Sun Jan 22 23:22:45 2006 From: python-checkins at python.org (phillip.eby) Date: Sun, 22 Jan 2006 23:22:45 +0100 (CET) Subject: [Python-checkins] r42148 - sandbox/trunk/setuptools/setuptools/command/easy_install.py Message-ID: <20060122222245.390FE1E4002@bag.python.org> Author: phillip.eby Date: Sun Jan 22 23:22:44 2006 New Revision: 42148 Modified: sandbox/trunk/setuptools/setuptools/command/easy_install.py Log: Fix typo. Modified: sandbox/trunk/setuptools/setuptools/command/easy_install.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/easy_install.py (original) +++ sandbox/trunk/setuptools/setuptools/command/easy_install.py Sun Jan 22 23:22:44 2006 @@ -772,7 +772,7 @@ If it uses setuptools in its setup script, you can activate it in "development" mode by going to that directory and running:: - %(python)s setup.py --develop + %(python)s setup.py develop See the setuptools documentation for the "develop" command for more info. """ % locals() From python-checkins at python.org Mon Jan 23 08:25:30 2006 From: python-checkins at python.org (neal.norwitz) Date: Mon, 23 Jan 2006 08:25:30 +0100 (CET) Subject: [Python-checkins] r42149 - python/trunk/Mac/Modules/file/_Filemodule.c Message-ID: <20060123072530.705231E4039@bag.python.org> Author: neal.norwitz Date: Mon Jan 23 08:25:29 2006 New Revision: 42149 Modified: python/trunk/Mac/Modules/file/_Filemodule.c Log: This should fix a mem leak on the Mac. Brett tested it. Modified: python/trunk/Mac/Modules/file/_Filemodule.c ============================================================================== --- python/trunk/Mac/Modules/file/_Filemodule.c (original) +++ python/trunk/Mac/Modules/file/_Filemodule.c Mon Jan 23 08:25:29 2006 @@ -3204,11 +3204,10 @@ char *path = NULL; if (!PyArg_Parse(v, "et", Py_FileSystemDefaultEncoding, &path)) return 0; - if ( (err=FSPathMakeRef(path, fsr, NULL)) ) { + if ( (err=FSPathMakeRef(path, fsr, NULL)) ) PyMac_Error(err); - return 0; - } - return 1; + PyMem_Free(path); + return !err; } /* XXXX Should try unicode here too */ /* Otherwise we try to go via an FSSpec */ From python-checkins at python.org Mon Jan 23 08:37:28 2006 From: python-checkins at python.org (neal.norwitz) Date: Mon, 23 Jan 2006 08:37:28 +0100 (CET) Subject: [Python-checkins] r42150 - python/branches/release24-maint/Mac/Modules/file/_Filemodule.c Message-ID: <20060123073728.AF5511E4002@bag.python.org> Author: neal.norwitz Date: Mon Jan 23 08:37:28 2006 New Revision: 42150 Modified: python/branches/release24-maint/Mac/Modules/file/_Filemodule.c Log: Backport: This should fix a mem leak on the Mac. Brett tested it. Modified: python/branches/release24-maint/Mac/Modules/file/_Filemodule.c ============================================================================== --- python/branches/release24-maint/Mac/Modules/file/_Filemodule.c (original) +++ python/branches/release24-maint/Mac/Modules/file/_Filemodule.c Mon Jan 23 08:37:28 2006 @@ -3202,9 +3202,9 @@ return NULL; if ( (err=FSPathMakeRef(path, fsr, NULL)) ) { PyMac_Error(err); - return 0; } - return 1; + PyMem_Free(path); + return !err; } /* XXXX Should try unicode here too */ /* Otherwise we try to go via an FSSpec */ From python-checkins at python.org Mon Jan 23 08:49:39 2006 From: python-checkins at python.org (neal.norwitz) Date: Mon, 23 Jan 2006 08:49:39 +0100 (CET) Subject: [Python-checkins] r42151 - python/trunk/Lib/test/test_symtable.py Message-ID: <20060123074939.62D321E4002@bag.python.org> Author: neal.norwitz Date: Mon Jan 23 08:49:36 2006 New Revision: 42151 Modified: python/trunk/Lib/test/test_symtable.py Log: Use unittest and make sure a few other cases don't crash Modified: python/trunk/Lib/test/test_symtable.py ============================================================================== --- python/trunk/Lib/test/test_symtable.py (original) +++ python/trunk/Lib/test/test_symtable.py Mon Jan 23 08:49:36 2006 @@ -1,8 +1,8 @@ -from test.test_support import vereq, TestFailed +from test import test_support import symtable +import unittest -symbols = symtable.symtable("def f(x): return x", "?", "exec") ## XXX ## Test disabled because symtable module needs to be rewritten for new compiler @@ -21,3 +21,24 @@ ## raise TestFailed("no SyntaxError for %r" % (brokencode,)) ##checkfilename("def f(x): foo)(") # parse-time ##checkfilename("def f(x): global x") # symtable-build-time + +class SymtableTest(unittest.TestCase): + def test_invalid_args(self): + self.assertRaises(TypeError, symtable.symtable, "42") + self.assertRaises(ValueError, symtable.symtable, "42", "?", "") + + def test_eval(self): + symbols = symtable.symtable("42", "?", "eval") + + def test_single(self): + symbols = symtable.symtable("42", "?", "single") + + def test_exec(self): + symbols = symtable.symtable("def f(x): return x", "?", "exec") + + +def test_main(): + test_support.run_unittest(SymtableTest) + +if __name__ == '__main__': + test_main() From python-checkins at python.org Mon Jan 23 08:50:07 2006 From: python-checkins at python.org (neal.norwitz) Date: Mon, 23 Jan 2006 08:50:07 +0100 (CET) Subject: [Python-checkins] r42152 - python/trunk/Lib/test/test_signal.py Message-ID: <20060123075007.5651C1E4005@bag.python.org> Author: neal.norwitz Date: Mon Jan 23 08:50:06 2006 New Revision: 42152 Modified: python/trunk/Lib/test/test_signal.py Log: Test getsignal() and some error conditions Modified: python/trunk/Lib/test/test_signal.py ============================================================================== --- python/trunk/Lib/test/test_signal.py (original) +++ python/trunk/Lib/test/test_signal.py Mon Jan 23 08:50:06 2006 @@ -1,5 +1,5 @@ # Test the signal module -from test.test_support import verbose, TestSkipped, TestFailed +from test.test_support import verbose, TestSkipped, TestFailed, vereq import signal import os, sys, time @@ -43,6 +43,28 @@ usr2 = signal.signal(signal.SIGUSR2, signal.SIG_IGN) alrm = signal.signal(signal.SIGALRM, signal.default_int_handler) +vereq(signal.getsignal(signal.SIGHUP), handlerA) +vereq(signal.getsignal(signal.SIGUSR1), handlerB) +vereq(signal.getsignal(signal.SIGUSR2), signal.SIG_IGN) + +try: + signal.signal(4242, handlerB) + raise TestFailed, 'expected ValueError for invalid signal # to signal()' +except ValueError: + pass + +try: + signal.getsignal(4242) + raise TestFailed, 'expected ValueError for invalid signal # to getsignal()' +except ValueError: + pass + +try: + signal.signal(signal.SIGUSR1, None) + raise TestFailed, 'expected TypeError for non-callable' +except TypeError: + pass + try: os.system(script) From python-checkins at python.org Mon Jan 23 08:51:28 2006 From: python-checkins at python.org (neal.norwitz) Date: Mon, 23 Jan 2006 08:51:28 +0100 (CET) Subject: [Python-checkins] r42153 - python/trunk/Lib/test/test_support.py Message-ID: <20060123075128.DDA4D1E4002@bag.python.org> Author: neal.norwitz Date: Mon Jan 23 08:51:27 2006 New Revision: 42153 Modified: python/trunk/Lib/test/test_support.py Log: Convenience function to remove a possibly non-existant file Modified: python/trunk/Lib/test/test_support.py ============================================================================== --- python/trunk/Lib/test/test_support.py (original) +++ python/trunk/Lib/test/test_support.py Mon Jan 23 08:51:27 2006 @@ -49,23 +49,24 @@ except KeyError: pass +def unlink(filename): + import os + try: + os.unlink(filename) + except OSError: + pass + def forget(modname): '''"Forget" a module was ever imported by removing it from sys.modules and deleting any .pyc and .pyo files.''' unload(modname) import os for dirname in sys.path: - try: - os.unlink(os.path.join(dirname, modname + os.extsep + 'pyc')) - except os.error: - pass + unlink(os.path.join(dirname, modname + os.extsep + 'pyc')) # Deleting the .pyo file cannot be within the 'try' for the .pyc since # the chance exists that there is no .pyc (and thus the 'try' statement # is exited) but there is a .pyo file. - try: - os.unlink(os.path.join(dirname, modname + os.extsep + 'pyo')) - except os.error: - pass + unlink(os.path.join(dirname, modname + os.extsep + 'pyo')) def is_resource_enabled(resource): """Test whether a resource is enabled. Known resources are set by @@ -175,14 +176,9 @@ (TESTFN, TMP_TESTFN)) if fp is not None: fp.close() - try: - os.unlink(TESTFN) - except: - pass + unlink(TESTFN) del os, fp -from os import unlink - def findfile(file, here=__file__): """Try to find a file on sys.path and the working directory. If it is not found the argument passed to the function is returned (this does not From python-checkins at python.org Mon Jan 23 08:52:13 2006 From: python-checkins at python.org (neal.norwitz) Date: Mon, 23 Jan 2006 08:52:13 +0100 (CET) Subject: [Python-checkins] r42154 - python/trunk/Lib/test/test_zipimport.py Message-ID: <20060123075213.8CFFB1E4002@bag.python.org> Author: neal.norwitz Date: Mon Jan 23 08:52:13 2006 New Revision: 42154 Modified: python/trunk/Lib/test/test_zipimport.py Log: Test zipimporter a bit more. Also get working with -R :: option for finding ref leaks Modified: python/trunk/Lib/test/test_zipimport.py ============================================================================== --- python/trunk/Lib/test/test_zipimport.py (original) +++ python/trunk/Lib/test/test_zipimport.py Mon Jan 23 08:52:13 2006 @@ -4,6 +4,7 @@ import imp import struct import time +import unittest import zlib # implied prerequisite from zipfile import ZipFile, ZipInfo, ZIP_STORED, ZIP_DEFLATED @@ -13,6 +14,11 @@ import zipimport +# so we only run testAFakeZlib once if this test is run repeatedly +# which happens when we look for ref leaks +test_imported = False + + def make_pyc(co, mtime): data = marshal.dumps(co) if type(mtime) is type(0.0): @@ -176,6 +182,37 @@ packdir2 + TESTMOD + pyc_ext: (NOW, test_pyc)} self.doTest(pyc_ext, files, TESTPACK, TESTPACK2, TESTMOD) + def testZipImporterMethods(self): + packdir = TESTPACK + os.sep + packdir2 = packdir + TESTPACK2 + os.sep + files = {packdir + "__init__" + pyc_ext: (NOW, test_pyc), + packdir2 + "__init__" + pyc_ext: (NOW, test_pyc), + packdir2 + TESTMOD + pyc_ext: (NOW, test_pyc)} + + z = ZipFile(TEMP_ZIP, "w") + try: + for name, (mtime, data) in files.items(): + zinfo = ZipInfo(name, time.localtime(mtime)) + zinfo.compress_type = self.compression + z.writestr(zinfo, data) + z.close() + + zi = zipimport.zipimporter(TEMP_ZIP) + self.assertEquals(zi.is_package(TESTPACK), True) + zi.load_module(TESTPACK) + + self.assertEquals(zi.is_package(packdir + '__init__'), False) + self.assertEquals(zi.is_package(packdir + TESTPACK2), True) + self.assertEquals(zi.is_package(packdir2 + TESTMOD), False) + + mod_name = packdir2 + TESTMOD + mod = __import__(mod_name.replace('/', '.')) + self.assertEquals(zi.get_source(TESTPACK), None) + self.assertEquals(zi.get_source(mod_name), None) + finally: + z.close() + os.remove(TEMP_ZIP) + def testGetData(self): z = ZipFile(TEMP_ZIP, "w") z.compression = self.compression @@ -186,6 +223,7 @@ z.close() zi = zipimport.zipimporter(TEMP_ZIP) self.assertEquals(data, zi.get_data(name)) + self.assert_('zipimporter object' in repr(zi)) finally: z.close() os.remove(TEMP_ZIP) @@ -212,11 +250,91 @@ compression = ZIP_DEFLATED +class BadFileZipImportTestCase(unittest.TestCase): + def assertZipFailure(self, filename): + self.assertRaises(zipimport.ZipImportError, + zipimport.zipimporter, filename) + + def testNoFile(self): + self.assertZipFailure('AdfjdkFJKDFJjdklfjs') + + def testEmptyFilename(self): + self.assertZipFailure('') + + def testBadArgs(self): + self.assertRaises(TypeError, zipimport.zipimporter, None) + self.assertRaises(TypeError, zipimport.zipimporter, TESTMOD, kwd=None) + + def testFilenameTooLong(self): + self.assertZipFailure('A' * 33000) + + def testEmptyFile(self): + test_support.unlink(TESTMOD) + open(TESTMOD, 'w+').close() + self.assertZipFailure(TESTMOD) + + def testFileUnreadable(self): + test_support.unlink(TESTMOD) + fd = os.open(TESTMOD, os.O_CREAT, 000) + os.close(fd) + self.assertZipFailure(TESTMOD) + + def testNotZipFile(self): + test_support.unlink(TESTMOD) + fp = open(TESTMOD, 'w+') + fp.write('a' * 22) + fp.close() + self.assertZipFailure(TESTMOD) + + def testBogusZipFile(self): + test_support.unlink(TESTMOD) + fp = open(TESTMOD, 'w+') + fp.write(struct.pack('=I', 0x06054B50)) + fp.write('a' * 18) + fp.close() + z = zipimport.zipimporter(TESTMOD) + + try: + self.assertRaises(TypeError, z.find_module, None) + self.assertRaises(TypeError, z.load_module, None) + self.assertRaises(TypeError, z.is_package, None) + self.assertRaises(TypeError, z.get_code, None) + self.assertRaises(TypeError, z.get_data, None) + self.assertRaises(TypeError, z.get_source, None) + + error = zipimport.ZipImportError + self.assertEqual(z.find_module('abc'), None) + + self.assertRaises(error, z.load_module, 'abc') + self.assertRaises(error, z.get_code, 'abc') + self.assertRaises(IOError, z.get_data, 'abc') + self.assertRaises(error, z.get_source, 'abc') + self.assertRaises(error, z.is_package, 'abc') + finally: + zipimport._zip_directory_cache.clear() + + +def cleanup(): + # this is necessary if test is run repeated (like when finding leaks) + global test_imported + if test_imported: + zipimport._zip_directory_cache.clear() + if hasattr(UncompressedZipImportTestCase, 'testAFakeZlib'): + delattr(UncompressedZipImportTestCase, 'testAFakeZlib') + if hasattr(CompressedZipImportTestCase, 'testAFakeZlib'): + delattr(CompressedZipImportTestCase, 'testAFakeZlib') + test_imported = True + def test_main(): - test_support.run_unittest( - UncompressedZipImportTestCase, - CompressedZipImportTestCase - ) + cleanup() + try: + test_support.run_unittest( + UncompressedZipImportTestCase, + CompressedZipImportTestCase, + BadFileZipImportTestCase, + ) + finally: + test_support.unlink(TESTMOD) if __name__ == "__main__": test_main() From python-checkins at python.org Mon Jan 23 09:48:04 2006 From: python-checkins at python.org (neal.norwitz) Date: Mon, 23 Jan 2006 09:48:04 +0100 (CET) Subject: [Python-checkins] r42155 - python/trunk/Lib/test/test_zipimport.py Message-ID: <20060123084804.85FEB1E4002@bag.python.org> Author: neal.norwitz Date: Mon Jan 23 09:48:03 2006 New Revision: 42155 Modified: python/trunk/Lib/test/test_zipimport.py Log: Disable this test until I can test on big-endian machines and get passing Modified: python/trunk/Lib/test/test_zipimport.py ============================================================================== --- python/trunk/Lib/test/test_zipimport.py (original) +++ python/trunk/Lib/test/test_zipimport.py Mon Jan 23 09:48:03 2006 @@ -286,7 +286,8 @@ fp.close() self.assertZipFailure(TESTMOD) - def testBogusZipFile(self): + # XXX: disabled until this works on Big-endian machines + def _testBogusZipFile(self): test_support.unlink(TESTMOD) fp = open(TESTMOD, 'w+') fp.write(struct.pack('=I', 0x06054B50)) From jimjjewett at gmail.com Mon Jan 23 16:47:50 2006 From: jimjjewett at gmail.com (Jim Jewett) Date: Mon, 23 Jan 2006 10:47:50 -0500 Subject: [Python-checkins] r42145 - python/branches/release24-maint/Doc/lib/libtime.tex In-Reply-To: <20060122193808.2072E1E4002@bag.python.org> References: <20060122193808.2072E1E4002@bag.python.org> Message-ID: Thank you for all your attention to the documents. -jJ From jimjjewett at gmail.com Mon Jan 23 16:56:55 2006 From: jimjjewett at gmail.com (Jim Jewett) Date: Mon, 23 Jan 2006 10:56:55 -0500 Subject: [Python-checkins] r42135 - sandbox/trunk/pycon/parse-sched.py In-Reply-To: <20060121141416.E1B8F1E4002@bag.python.org> References: <20060121141416.E1B8F1E4002@bag.python.org> Message-ID: Am I missing something, or is the second "span < len(time_list)" redundant? On 1/21/06, andrew.kuchling wrote: > Author: andrew.kuchling > Date: Sat Jan 21 15:14:16 2006 > New Revision: 42135 > > Modified: > sandbox/trunk/pycon/parse-sched.py > Log: > Change #! line; fix bug in determining row span (code would walk off the end of the list) > > Modified: sandbox/trunk/pycon/parse-sched.py > ============================================================================== > --- sandbox/trunk/pycon/parse-sched.py (original) > +++ sandbox/trunk/pycon/parse-sched.py Sat Jan 21 15:14:16 2006 > @@ -1,4 +1,4 @@ > -#!/usr/bin/python > +#!/usr/bin/env python > > # Reads a page with a wiki-format table > > @@ -93,8 +93,11 @@ > if len(time_list) == 0: > return 1 > span = 0 > - while (time_list[span][0] < end_time and span < len(time_list)): > + while (span < len(time_list) and > + (time_list[span][0] < end_time and span < len(time_list))): > span += 1 > + if span == len(time_list): > + return 1 > return span+1 > > > _______________________________________________ > Python-checkins mailing list > Python-checkins at python.org > http://mail.python.org/mailman/listinfo/python-checkins > From python-checkins at python.org Mon Jan 23 17:29:16 2006 From: python-checkins at python.org (phillip.eby) Date: Mon, 23 Jan 2006 17:29:16 +0100 (CET) Subject: [Python-checkins] r42156 - sandbox/trunk/setuptools/setuptools/package_index.py Message-ID: <20060123162916.B5A5F1E4003@bag.python.org> Author: phillip.eby Date: Mon Jan 23 17:29:16 2006 New Revision: 42156 Modified: sandbox/trunk/setuptools/setuptools/package_index.py Log: Randomly select a SourceForge mirror IP for each download, to work around too-aggressive DNS caches on some platforms, that could otherwise result in a stuck bad IP. Modified: sandbox/trunk/setuptools/setuptools/package_index.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/package_index.py (original) +++ sandbox/trunk/setuptools/setuptools/package_index.py Mon Jan 23 17:29:16 2006 @@ -1,6 +1,6 @@ """PyPI and direct package downloading""" -import sys, os.path, re, urlparse, urllib2, shutil +import sys, os.path, re, urlparse, urllib2, shutil, random, socket from pkg_resources import * from distutils import log from distutils.errors import DistutilsError @@ -562,18 +562,28 @@ log.warn(msg, *args) + + + + + + + + + + def fix_sf_url(url): scheme, server, path, param, query, frag = urlparse.urlparse(url) if server!='prdownloads.sourceforge.net': return url return urlparse.urlunparse( - (scheme, 'dl.sourceforge.net', 'sourceforge'+path, param, '', frag) + (scheme, get_sf_ip(), 'sourceforge'+path, param, '', frag) ) - - - - +def get_sf_ip(_mirrors=[]): + if not _mirrors: + _mirrors[:] = socket.gethostbyname_ex('dl.sourceforge.net')[-1] + return random.choice(_mirrors) From python-checkins at python.org Mon Jan 23 20:53:37 2006 From: python-checkins at python.org (andrew.kuchling) Date: Mon, 23 Jan 2006 20:53:37 +0100 (CET) Subject: [Python-checkins] r42157 - sandbox/trunk/pycon/parse-sched.py Message-ID: <20060123195337.3BE391E4002@bag.python.org> Author: andrew.kuchling Date: Mon Jan 23 20:53:34 2006 New Revision: 42157 Modified: sandbox/trunk/pycon/parse-sched.py Log: Remove redundant len() Modified: sandbox/trunk/pycon/parse-sched.py ============================================================================== --- sandbox/trunk/pycon/parse-sched.py (original) +++ sandbox/trunk/pycon/parse-sched.py Mon Jan 23 20:53:34 2006 @@ -93,8 +93,7 @@ if len(time_list) == 0: return 1 span = 0 - while (span < len(time_list) and - (time_list[span][0] < end_time and span < len(time_list))): + while (span < len(time_list) and time_list[span][0] < end_time): span += 1 if span == len(time_list): return 1 From python-checkins at python.org Mon Jan 23 21:49:25 2006 From: python-checkins at python.org (georg.brandl) Date: Mon, 23 Jan 2006 21:49:25 +0100 (CET) Subject: [Python-checkins] r42158 - python/trunk/Doc/lib/liburlparse.tex Message-ID: <20060123204925.371AF1E4002@bag.python.org> Author: georg.brandl Date: Mon Jan 23 21:49:24 2006 New Revision: 42158 Modified: python/trunk/Doc/lib/liburlparse.tex Log: Add \versionadded for sftp scheme Modified: python/trunk/Doc/lib/liburlparse.tex ============================================================================== --- python/trunk/Doc/lib/liburlparse.tex (original) +++ python/trunk/Doc/lib/liburlparse.tex Mon Jan 23 21:49:24 2006 @@ -25,6 +25,7 @@ \code{nntp}, \code{prospero}, \code{rsync}, \code{rtsp}, \code{rtspu}, \code{sftp}, \code{shttp}, \code{sip}, \code{snews}, \code{svn}, \code{svn+ssh}, \code{telnet}, \code{wais}. +\versionadded[Support for the \code{sftp} scheme]{2.5} The \module{urlparse} module defines the following functions: From python-checkins at python.org Mon Jan 23 22:28:43 2006 From: python-checkins at python.org (tim.peters) Date: Mon, 23 Jan 2006 22:28:43 +0100 (CET) Subject: [Python-checkins] r42159 - python/trunk/Lib/test/test_zipimport.py Message-ID: <20060123212843.3A2551E4002@bag.python.org> Author: tim.peters Date: Mon Jan 23 22:28:42 2006 New Revision: 42159 Modified: python/trunk/Lib/test/test_zipimport.py Log: Whitespace normalization. Modified: python/trunk/Lib/test/test_zipimport.py ============================================================================== --- python/trunk/Lib/test/test_zipimport.py (original) +++ python/trunk/Lib/test/test_zipimport.py Mon Jan 23 22:28:42 2006 @@ -200,7 +200,7 @@ zi = zipimport.zipimporter(TEMP_ZIP) self.assertEquals(zi.is_package(TESTPACK), True) zi.load_module(TESTPACK) - + self.assertEquals(zi.is_package(packdir + '__init__'), False) self.assertEquals(zi.is_package(packdir + TESTPACK2), True) self.assertEquals(zi.is_package(packdir2 + TESTMOD), False) From python-checkins at python.org Mon Jan 23 22:31:00 2006 From: python-checkins at python.org (georg.brandl) Date: Mon, 23 Jan 2006 22:31:00 +0100 (CET) Subject: [Python-checkins] r42160 - python/trunk/Tools/scripts/texcheck.py Message-ID: <20060123213100.DE8CB1E4004@bag.python.org> Author: georg.brandl Date: Mon Jan 23 22:31:00 2006 New Revision: 42160 Modified: python/trunk/Tools/scripts/texcheck.py Log: Add two missing markup tags. Modified: python/trunk/Tools/scripts/texcheck.py ============================================================================== --- python/trunk/Tools/scripts/texcheck.py (original) +++ python/trunk/Tools/scripts/texcheck.py Mon Jan 23 22:31:00 2006 @@ -57,7 +57,7 @@ \email \kwindex \refexmodindex \filenq \e \menuselection \exindex \linev \newsgroup \verbatim \setshortversion \author \authoraddress \paragraph \subparagraph \cmemberline - \textbar + \textbar \C \seelink """ def matchclose(c_lineno, c_symbol, openers, pairmap): From python-checkins at python.org Mon Jan 23 22:33:04 2006 From: python-checkins at python.org (georg.brandl) Date: Mon, 23 Jan 2006 22:33:04 +0100 (CET) Subject: [Python-checkins] r42161 - python/trunk/Doc/lib/libcookie.tex python/trunk/Doc/lib/libdl.tex python/trunk/Doc/lib/libmmap.tex python/trunk/Doc/lib/libwinreg.tex Message-ID: <20060123213304.603211E4004@bag.python.org> Author: georg.brandl Date: Mon Jan 23 22:33:03 2006 New Revision: 42161 Modified: python/trunk/Doc/lib/libcookie.tex python/trunk/Doc/lib/libdl.tex python/trunk/Doc/lib/libmmap.tex python/trunk/Doc/lib/libwinreg.tex Log: Correct typos and markup errors (Raymond, thanks for texcheck) Modified: python/trunk/Doc/lib/libcookie.tex ============================================================================== --- python/trunk/Doc/lib/libcookie.tex (original) +++ python/trunk/Doc/lib/libcookie.tex Mon Jan 23 22:33:03 2006 @@ -68,7 +68,7 @@ \begin{seealso} - \seemodule{cookielib}{HTTP cookie handling for for web + \seemodule{cookielib}{HTTP cookie handling for web \emph{clients}. The \module{cookielib} and \module{Cookie} modules do not depend on each other.} Modified: python/trunk/Doc/lib/libdl.tex ============================================================================== --- python/trunk/Doc/lib/libdl.tex (original) +++ python/trunk/Doc/lib/libdl.tex Mon Jan 23 22:33:03 2006 @@ -27,7 +27,7 @@ (\constant{RTLD_NOW}). Default is \constant{RTLD_LAZY}. Note that some systems do not support \constant{RTLD_NOW}. -Return value is a \pytype{dlobject}. +Return value is a \class{dlobject}. \end{funcdesc} The \module{dl} module defines the following constants: Modified: python/trunk/Doc/lib/libmmap.tex ============================================================================== --- python/trunk/Doc/lib/libmmap.tex (original) +++ python/trunk/Doc/lib/libmmap.tex Mon Jan 23 22:33:03 2006 @@ -63,7 +63,7 @@ \strong{(\UNIX{} version)} Maps \var{length} bytes from the file specified by the file descriptor \var{fileno}, and returns a mmap object. If \var{length} is \code{0}, the maximum length of the map - will be the current size of the file when \function{mmap(} is + will be the current size of the file when \function{mmap()} is called. \var{flags} specifies the nature of the mapping. Modified: python/trunk/Doc/lib/libwinreg.tex ============================================================================== --- python/trunk/Doc/lib/libwinreg.tex (original) +++ python/trunk/Doc/lib/libwinreg.tex Mon Jan 23 22:33:03 2006 @@ -387,8 +387,8 @@ objects will compare true if they both reference the same underlying Windows handle value. - Handle objects can be converted to an integer (eg, using the - builtin \function{int()} function, in which case the underlying + Handle objects can be converted to an integer (e.g., using the + builtin \function{int()} function), in which case the underlying Windows handle value is returned. You can also use the \method{Detach()} method to return the integer handle, and also disconnect the Windows handle from the handle object. From python-checkins at python.org Mon Jan 23 22:33:49 2006 From: python-checkins at python.org (georg.brandl) Date: Mon, 23 Jan 2006 22:33:49 +0100 (CET) Subject: [Python-checkins] r42162 - python/trunk/Doc/lib/libcodecs.tex Message-ID: <20060123213349.1C2261E4004@bag.python.org> Author: georg.brandl Date: Mon Jan 23 22:33:48 2006 New Revision: 42162 Modified: python/trunk/Doc/lib/libcodecs.tex Log: Add markup to new section in codecs docs Modified: python/trunk/Doc/lib/libcodecs.tex ============================================================================== --- python/trunk/Doc/lib/libcodecs.tex (original) +++ python/trunk/Doc/lib/libcodecs.tex Mon Jan 23 22:33:48 2006 @@ -525,9 +525,10 @@ \subsection{Encodings and Unicode\label{encodings-overview}} Unicode strings are stored internally as sequences of codepoints (to -be precise as Py_UNICODE arrays). Depending on the way Python is -compiled (either via --enable-unicode=ucs2 or --enable-unicode=ucs4, -with the former being the default) Py_UNICODE is either a 16-bit or +be precise as \ctype{Py_UNICODE} arrays). Depending on the way Python is +compiled (either via \longprogramopt{enable-unicode=ucs2} or +\longprogramopt{enable-unicode=ucs4}, with the former being the default) +\ctype{Py_UNICODE} is either a 16-bit or 32-bit data type. Once a Unicode object is used outside of CPU and memory, CPU endianness and how these arrays are stored as bytes become an issue. Transforming a unicode object into a sequence of bytes is @@ -535,20 +536,20 @@ bytes is known as decoding. There are many different methods how this transformation can be done (these methods are also called encodings). The simplest method is to map the codepoints 0-255 to the bytes -0x0-0xff. This means that a unicode object that contains codepoints -above U+00FF can't be encoded with this method (which is called -'latin-1' or 'iso-8859-1'). unicode.encode() will raise a -UnicodeEncodeError that looks like this: UnicodeEncodeError: 'latin-1' -codec can't encode character u'\u1234' in position 3: ordinal not in -range(256) +\code{0x0}-\code{0xff}. This means that a unicode object that contains +codepoints above \code{U+00FF} can't be encoded with this method (which +is called \code{'latin-1'} or \code{'iso-8859-1'}). unicode.encode() will +raise a UnicodeEncodeError that looks like this: \samp{UnicodeEncodeError: +'latin-1' codec can't encode character u'\e u1234' in position 3: ordinal +not in range(256)}. There's another group of encodings (the so called charmap encodings) that choose a different subset of all unicode code points and how -these codepoints are mapped to the bytes 0x0-0xff. To see how this is -done simply open e.g. encodings/cp1252.py (which is an encoding that -is used primarily on Windows). There's a string constant with 256 -characters that shows you which character is mapped to which byte -value. +these codepoints are mapped to the bytes \code{0x0}-\code{0xff.} +To see how this is done simply open e.g. \file{encodings/cp1252.py} +(which is an encoding that is used primarily on Windows). +There's a string constant with 256 characters that shows you which +character is mapped to which byte value. All of these encodings can only encode 256 of the 65536 (or 1114111) codepoints defined in unicode. A simple and straightforward way that @@ -562,20 +563,20 @@ by a CPU with a different endianness, then bytes have to be swapped though. To be able to detect the endianness of a UTF-16 byte sequence, there's the so called BOM (the "Byte Order Mark"). This is the Unicode -character U+FEFF. This character will be prepended to every UTF-16 -byte sequence. The byte swapped version of this character (0xFFFE) is +character \code{U+FEFF}. This character will be prepended to every UTF-16 +byte sequence. The byte swapped version of this character (\code{0xFFFE}) is an illegal character that may not appear in a Unicode text. So when -the first character in an UTF-16 byte sequence appears to be a U+FFFE +the first character in an UTF-16 byte sequence appears to be a \code{U+FFFE} the bytes have to be swapped on decoding. Unfortunately upto Unicode -4.0 the character U+FEFF had a second purpose as a "ZERO WIDTH -NO-BREAK SPACE": A character that has no width and doesn't allow a +4.0 the character \code{U+FEFF} had a second purpose as a \samp{ZERO WIDTH +NO-BREAK SPACE}: A character that has no width and doesn't allow a word to be split. It can e.g. be used to give hints to a ligature -algorithm. With Unicode 4.0 using U+FEFF as a ZERO WIDTH NO-BREAK -SPACE has been deprecated (with U+2060 (WORD JOINER) assuming this -role). Nevertheless Unicode software still must be able to handle -U+FEFF in both roles: As a BOM it's a device to determine the storage +algorithm. With Unicode 4.0 using \code{U+FEFF} as a \samp{ZERO WIDTH NO-BREAK +SPACE} has been deprecated (with \code{U+2060} (\samp{WORD JOINER}) assuming +this role). Nevertheless Unicode software still must be able to handle +\code{U+FEFF} in both roles: As a BOM it's a device to determine the storage layout of the encoded bytes, and vanishes once the byte sequence has -been decoded into a Unicode string; as a ZERO WIDTH NO-BREAK SPACE +been decoded into a Unicode string; as a \samp{ZERO WIDTH NO-BREAK SPACE} it's a normal character that will be decoded like any other. There's another encoding that is able to encoding the full range of @@ -588,20 +589,20 @@ character): \begin{tableii}{l|l}{textrm}{}{Range}{Encoding} -\lineii{U-00000000 ... U-0000007F}{0xxxxxxx} -\lineii{U-00000080 ... U-000007FF}{110xxxxx 10xxxxxx} -\lineii{U-00000800 ... U-0000FFFF}{1110xxxx 10xxxxxx 10xxxxxx} -\lineii{U-00010000 ... U-001FFFFF}{11110xxx 10xxxxxx 10xxxxxx 10xxxxxx} -\lineii{U-00200000 ... U-03FFFFFF}{111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx} -\lineii{U-04000000 ... U-7FFFFFFF}{1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx} +\lineii{\code{U-00000000} ... \code{U-0000007F}}{0xxxxxxx} +\lineii{\code{U-00000080} ... \code{U-000007FF}}{110xxxxx 10xxxxxx} +\lineii{\code{U-00000800} ... \code{U-0000FFFF}}{1110xxxx 10xxxxxx 10xxxxxx} +\lineii{\code{U-00010000} ... \code{U-001FFFFF}}{11110xxx 10xxxxxx 10xxxxxx 10xxxxxx} +\lineii{\code{U-00200000} ... \code{U-03FFFFFF}}{111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx} +\lineii{\code{U-04000000} ... \code{U-7FFFFFFF}}{1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx} \end{tableii} The least significant bit of the Unicode character is the rightmost x bit. -As UTF-8 is an 8bit encoding no BOM is required and any U+FEFF +As UTF-8 is an 8bit encoding no BOM is required and any \code{U+FEFF} character in the decoded Unicode string (even if it's the first -character) is treated as a ZERO WIDTH NO-BREAK SPACE. +character) is treated as a \samp{ZERO WIDTH NO-BREAK SPACE}. Without external information it's impossible to reliably determine which encoding was used for encoding a Unicode string. Each charmap @@ -609,14 +610,14 @@ possible with UTF-8, as UTF-8 byte sequences have a structure that doesn't allow arbitrary byte sequence. To increase the reliability with which a UTF-8 encoding can be detected, Microsoft invented a -variant of UTF-8 (that Python 2.5 calls "utf-8-sig") for its Notepad +variant of UTF-8 (that Python 2.5 calls \code{"utf-8-sig"}) for its Notepad program: Before any of the Unicode characters is written to the file, -a UTF-8 encoded BOM (which looks like this as a byte sequence: 0xef, -0xbb, 0xbf) is written. As it's rather improbably that any charmap -encoded file starts with these byte values (which would e.g. map to +a UTF-8 encoded BOM (which looks like this as a byte sequence: \code{0xef}, +\code{0xbb}, \code{0xbf}) is written. As it's rather improbably that any +charmap encoded file starts with these byte values (which would e.g. map to - LATIN SMALL LETTER I WITH DIAERESIS - RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + LATIN SMALL LETTER I WITH DIAERESIS \\ + RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK \\ INVERTED QUESTION MARK in iso-8859-1), this increases the probability that a utf-8-sig @@ -624,9 +625,9 @@ BOM is not used to be able to determine the byte order used for generating the byte sequence, but as a signature that helps in guessing the encoding. On encoding the utf-8-sig codec will write -0xef, 0xbb, 0xbf as the first three bytes to the file. On decoding -utf-8-sig will skip those three bytes if they appear as the first -three bytes in the file. +\code{0xef}, \code{0xbb}, \code{0xbf} as the first three bytes to the file. +On decoding utf-8-sig will skip those three bytes if they appear as the +first three bytes in the file. \subsection{Standard Encodings\label{standard-encodings}} From python-checkins at python.org Mon Jan 23 22:34:22 2006 From: python-checkins at python.org (georg.brandl) Date: Mon, 23 Jan 2006 22:34:22 +0100 (CET) Subject: [Python-checkins] r42163 - python/branches/release24-maint/Doc/lib/libcookie.tex python/branches/release24-maint/Doc/lib/libdl.tex python/branches/release24-maint/Doc/lib/libwinreg.tex Message-ID: <20060123213422.379F41E4004@bag.python.org> Author: georg.brandl Date: Mon Jan 23 22:34:21 2006 New Revision: 42163 Modified: python/branches/release24-maint/Doc/lib/libcookie.tex python/branches/release24-maint/Doc/lib/libdl.tex python/branches/release24-maint/Doc/lib/libwinreg.tex Log: Backport typo fixes. Modified: python/branches/release24-maint/Doc/lib/libcookie.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libcookie.tex (original) +++ python/branches/release24-maint/Doc/lib/libcookie.tex Mon Jan 23 22:34:21 2006 @@ -68,7 +68,7 @@ \begin{seealso} - \seemodule{cookielib}{HTTP cookie handling for for web + \seemodule{cookielib}{HTTP cookie handling for web \emph{clients}. The \module{cookielib} and \module{Cookie} modules do not depend on each other.} Modified: python/branches/release24-maint/Doc/lib/libdl.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libdl.tex (original) +++ python/branches/release24-maint/Doc/lib/libdl.tex Mon Jan 23 22:34:21 2006 @@ -27,7 +27,7 @@ (\constant{RTLD_NOW}). Default is \constant{RTLD_LAZY}. Note that some systems do not support \constant{RTLD_NOW}. -Return value is a \pytype{dlobject}. +Return value is a \class{dlobject}. \end{funcdesc} The \module{dl} module defines the following constants: Modified: python/branches/release24-maint/Doc/lib/libwinreg.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libwinreg.tex (original) +++ python/branches/release24-maint/Doc/lib/libwinreg.tex Mon Jan 23 22:34:21 2006 @@ -387,8 +387,8 @@ objects will compare true if they both reference the same underlying Windows handle value. - Handle objects can be converted to an integer (eg, using the - builtin \function{int()} function, in which case the underlying + Handle objects can be converted to an integer (e.g., using the + builtin \function{int()} function), in which case the underlying Windows handle value is returned. You can also use the \method{Detach()} method to return the integer handle, and also disconnect the Windows handle from the handle object. From python-checkins at python.org Mon Jan 23 23:00:18 2006 From: python-checkins at python.org (georg.brandl) Date: Mon, 23 Jan 2006 23:00:18 +0100 (CET) Subject: [Python-checkins] r42164 - python/trunk/Doc/lib/libdecimal.tex python/trunk/Doc/lib/libhashlib.tex python/trunk/Doc/lib/libpipes.tex python/trunk/Doc/lib/tkinter.tex Message-ID: <20060123220018.C2F371E4002@bag.python.org> Author: georg.brandl Date: Mon Jan 23 23:00:17 2006 New Revision: 42164 Modified: python/trunk/Doc/lib/libdecimal.tex python/trunk/Doc/lib/libhashlib.tex python/trunk/Doc/lib/libpipes.tex python/trunk/Doc/lib/tkinter.tex Log: Correct misspellings. Modified: python/trunk/Doc/lib/libdecimal.tex ============================================================================== --- python/trunk/Doc/lib/libdecimal.tex (original) +++ python/trunk/Doc/lib/libdecimal.tex Mon Jan 23 23:00:17 2006 @@ -1180,7 +1180,7 @@ a single recognizable canonical value? A. The \method{normalize()} method maps all equivalent values to a single -representive: +representative: \begin{verbatim} >>> values = map(Decimal, '200 200.000 2E2 .02E+4'.split()) Modified: python/trunk/Doc/lib/libhashlib.tex ============================================================================== --- python/trunk/Doc/lib/libhashlib.tex (original) +++ python/trunk/Doc/lib/libhashlib.tex Mon Jan 23 23:00:17 2006 @@ -15,7 +15,7 @@ message digest algorithms. Included are the FIPS secure hash algorithms SHA1, SHA224, SHA256, SHA384, and SHA512 (defined in FIPS 180-2) as well as RSA's MD5 algorithm (defined in Internet \rfc{1321}). -The terms secure hash and message digest are interchangable. Older +The terms secure hash and message digest are interchangeable. Older algorithms were called message digests. The modern term is secure hash. \warning{Some algorithms have known hash collision weaknesses, see the FAQ at the end.} Modified: python/trunk/Doc/lib/libpipes.tex ============================================================================== --- python/trunk/Doc/lib/libpipes.tex (original) +++ python/trunk/Doc/lib/libpipes.tex Mon Jan 23 23:00:17 2006 @@ -8,7 +8,7 @@ The \module{pipes} module defines a class to abstract the concept of -a \emph{pipeline} --- a sequence of convertors from one file to +a \emph{pipeline} --- a sequence of converters from one file to another. Because the module uses \program{/bin/sh} command lines, a \POSIX{} or Modified: python/trunk/Doc/lib/tkinter.tex ============================================================================== --- python/trunk/Doc/lib/tkinter.tex (original) +++ python/trunk/Doc/lib/tkinter.tex Mon Jan 23 23:00:17 2006 @@ -476,7 +476,7 @@ Note that the man pages list "STANDARD OPTIONS" and "WIDGET SPECIFIC OPTIONS" for each widget. The former is a list of options that are common to many widgets, the latter are the options that are -ideosyncratic to that particular widget. The Standard Options are +idiosyncratic to that particular widget. The Standard Options are documented on the \manpage{options}{3} man page. No distinction between standard and widget-specific options is made in From python-checkins at python.org Mon Jan 23 23:00:34 2006 From: python-checkins at python.org (georg.brandl) Date: Mon, 23 Jan 2006 23:00:34 +0100 (CET) Subject: [Python-checkins] r42165 - python/branches/release24-maint/Doc/lib/libdecimal.tex python/branches/release24-maint/Doc/lib/libpipes.tex python/branches/release24-maint/Doc/lib/tkinter.tex Message-ID: <20060123220034.50E841E4002@bag.python.org> Author: georg.brandl Date: Mon Jan 23 23:00:32 2006 New Revision: 42165 Modified: python/branches/release24-maint/Doc/lib/libdecimal.tex python/branches/release24-maint/Doc/lib/libpipes.tex python/branches/release24-maint/Doc/lib/tkinter.tex Log: Correct misspellings. Modified: python/branches/release24-maint/Doc/lib/libdecimal.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libdecimal.tex (original) +++ python/branches/release24-maint/Doc/lib/libdecimal.tex Mon Jan 23 23:00:32 2006 @@ -1180,7 +1180,7 @@ a single recognizable canonical value? A. The \method{normalize()} method maps all equivalent values to a single -representive: +representative: \begin{verbatim} >>> values = map(Decimal, '200 200.000 2E2 .02E+4'.split()) Modified: python/branches/release24-maint/Doc/lib/libpipes.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/libpipes.tex (original) +++ python/branches/release24-maint/Doc/lib/libpipes.tex Mon Jan 23 23:00:32 2006 @@ -8,7 +8,7 @@ The \module{pipes} module defines a class to abstract the concept of -a \emph{pipeline} --- a sequence of convertors from one file to +a \emph{pipeline} --- a sequence of converters from one file to another. Because the module uses \program{/bin/sh} command lines, a \POSIX{} or Modified: python/branches/release24-maint/Doc/lib/tkinter.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/tkinter.tex (original) +++ python/branches/release24-maint/Doc/lib/tkinter.tex Mon Jan 23 23:00:32 2006 @@ -476,7 +476,7 @@ Note that the man pages list "STANDARD OPTIONS" and "WIDGET SPECIFIC OPTIONS" for each widget. The former is a list of options that are common to many widgets, the latter are the options that are -ideosyncratic to that particular widget. The Standard Options are +idiosyncratic to that particular widget. The Standard Options are documented on the \manpage{options}{3} man page. No distinction between standard and widget-specific options is made in From python-checkins at python.org Mon Jan 23 23:19:25 2006 From: python-checkins at python.org (tim.peters) Date: Mon, 23 Jan 2006 23:19:25 +0100 (CET) Subject: [Python-checkins] r42166 - python/trunk/Lib/test/test_zipimport.py Message-ID: <20060123221925.A42D31E4002@bag.python.org> Author: tim.peters Date: Mon Jan 23 23:19:24 2006 New Revision: 42166 Modified: python/trunk/Lib/test/test_zipimport.py Log: Repaired new test failures on Windows: - The path separator isn't "/" on Windows. - Leaving behind a read-only file causes cascades of bogus failures on Windows. Modified: python/trunk/Lib/test/test_zipimport.py ============================================================================== --- python/trunk/Lib/test/test_zipimport.py (original) +++ python/trunk/Lib/test/test_zipimport.py Mon Jan 23 23:19:24 2006 @@ -30,6 +30,9 @@ pyc = imp.get_magic() + struct.pack(" Author: neal.norwitz Date: Tue Jan 24 07:06:11 2006 New Revision: 42167 Modified: python/trunk/Objects/cobject.c Log: Fix SF #1412837, compile failed with Watcom compiler Modified: python/trunk/Objects/cobject.c ============================================================================== --- python/trunk/Objects/cobject.c (original) +++ python/trunk/Objects/cobject.c Tue Jan 24 07:06:11 2006 @@ -100,16 +100,16 @@ } int -PyCObject_SetVoidPtr(PyObject *_self, void *cobj) +PyCObject_SetVoidPtr(PyObject *self, void *cobj) { - PyCObject* self = (PyCObject*)_self; - if (self == NULL || !PyCObject_Check(self) || - self->destructor != NULL) { + PyCObject* cself = (PyCObject*)self; + if (cself == NULL || !PyCObject_Check(cself) || + cself->destructor != NULL) { PyErr_SetString(PyExc_TypeError, "Invalid call to PyCObject_SetVoidPtr"); return 0; } - self->cobject = cobj; + cself->cobject = cobj; return 1; } From python-checkins at python.org Tue Jan 24 07:08:24 2006 From: python-checkins at python.org (neal.norwitz) Date: Tue, 24 Jan 2006 07:08:24 +0100 (CET) Subject: [Python-checkins] r42168 - python/branches/release24-maint/Objects/cobject.c Message-ID: <20060124060824.B8E7D1E4002@bag.python.org> Author: neal.norwitz Date: Tue Jan 24 07:08:23 2006 New Revision: 42168 Modified: python/branches/release24-maint/Objects/cobject.c Log: Fix SF #1412837, compile failed with Watcom compiler Modified: python/branches/release24-maint/Objects/cobject.c ============================================================================== --- python/branches/release24-maint/Objects/cobject.c (original) +++ python/branches/release24-maint/Objects/cobject.c Tue Jan 24 07:08:23 2006 @@ -100,16 +100,16 @@ } int -PyCObject_SetVoidPtr(PyObject *_self, void *cobj) +PyCObject_SetVoidPtr(PyObject *self, void *cobj) { - PyCObject* self = (PyCObject*)_self; - if (self == NULL || !PyCObject_Check(self) || - self->destructor != NULL) { + PyCObject* cself = (PyCObject*)self; + if (cself == NULL || !PyCObject_Check(cself) || + cself->destructor != NULL) { PyErr_SetString(PyExc_TypeError, "Invalid call to PyCObject_SetVoidPtr"); return 0; } - self->cobject = cobj; + cself->cobject = cobj; return 1; } From python-checkins at python.org Tue Jan 24 10:46:49 2006 From: python-checkins at python.org (gregory.p.smith) Date: Tue, 24 Jan 2006 10:46:49 +0100 (CET) Subject: [Python-checkins] r42169 - in python/trunk: Modules/_bsddb.c setup.py Message-ID: <20060124094649.C02991E4002@bag.python.org> Author: gregory.p.smith Date: Tue Jan 24 10:46:48 2006 New Revision: 42169 Modified: python/trunk/Modules/_bsddb.c python/trunk/setup.py Log: Support for BerkeleyDB 4.4 (tested against 4.4.20 as well as all the way back thru 3.2). This should be backported to the release24-maint branch. Modified: python/trunk/Modules/_bsddb.c ============================================================================== --- python/trunk/Modules/_bsddb.c (original) +++ python/trunk/Modules/_bsddb.c Tue Jan 24 10:46:48 2006 @@ -97,7 +97,7 @@ #error "eek! DBVER can't handle minor versions > 9" #endif -#define PY_BSDDB_VERSION "4.3.3" +#define PY_BSDDB_VERSION "4.4.0" static char *rcs_id = "$Id$"; @@ -4308,8 +4308,13 @@ #endif MAKE_ENTRY(nrequests); MAKE_ENTRY(nreleases); - MAKE_ENTRY(nnowaits); +#if (DBVER < 44) + MAKE_ENTRY(nnowaits); /* these were renamed in 4.4 */ MAKE_ENTRY(nconflicts); +#else + MAKE_ENTRY(lock_nowait); + MAKE_ENTRY(lock_wait); +#endif MAKE_ENTRY(ndeadlocks); MAKE_ENTRY(regsize); MAKE_ENTRY(region_wait); @@ -5153,7 +5158,11 @@ ADD_INT(d, DB_LOCK_IREAD); ADD_INT(d, DB_LOCK_IWR); #if (DBVER >= 33) +#if (DBVER < 44) ADD_INT(d, DB_LOCK_DIRTY); +#else + ADD_INT(d, DB_LOCK_READ_UNCOMMITTED); /* renamed in 4.4 */ +#endif ADD_INT(d, DB_LOCK_WWRITE); #endif @@ -5255,6 +5264,11 @@ ADD_INT(d, DB_MULTIPLE_KEY); #endif +#if (DBVER >= 44) + ADD_INT(d, DB_READ_UNCOMMITTED); /* replaces DB_DIRTY_READ in 4.4 */ + ADD_INT(d, DB_READ_COMMITTED); +#endif + #if (DBVER >= 33) ADD_INT(d, DB_DONOTINDEX); #endif Modified: python/trunk/setup.py ============================================================================== --- python/trunk/setup.py (original) +++ python/trunk/setup.py Tue Jan 24 10:46:48 2006 @@ -556,12 +556,12 @@ # Sleepycat Berkeley DB interface. http://www.sleepycat.com # # This requires the Sleepycat DB code. The earliest supported version - # of that library is 3.2, the latest supported version is 4.3. A list + # of that library is 3.2, the latest supported version is 4.4. A list # of available releases can be found at # # http://www.sleepycat.com/update/index.html - max_db_ver = (4, 3) + max_db_ver = (4, 4) min_db_ver = (3, 2) db_setup_debug = False # verbose debug prints from this script? @@ -578,18 +578,20 @@ '/sw/include/db3', ] # 4.x minor number specific paths - for x in (0,1,2,3): + for x in (0,1,2,3,4): db_inc_paths.append('/usr/include/db4%d' % x) db_inc_paths.append('/usr/include/db4.%d' % x) db_inc_paths.append('/usr/local/BerkeleyDB.4.%d/include' % x) db_inc_paths.append('/usr/local/include/db4%d' % x) db_inc_paths.append('/pkg/db-4.%d/include' % x) + db_inc_paths.append('/opt/db-4.%d/include' % x) # 3.x minor number specific paths for x in (2,3): db_inc_paths.append('/usr/include/db3%d' % x) db_inc_paths.append('/usr/local/BerkeleyDB.3.%d/include' % x) db_inc_paths.append('/usr/local/include/db3%d' % x) db_inc_paths.append('/pkg/db-3.%d/include' % x) + db_inc_paths.append('/opt/db-3.%d/include' % x) db_ver_inc_map = {} From python-checkins at python.org Tue Jan 24 11:02:03 2006 From: python-checkins at python.org (gregory.p.smith) Date: Tue, 24 Jan 2006 11:02:03 +0100 (CET) Subject: [Python-checkins] r42170 - in python/branches/release24-maint: Modules/_bsddb.c setup.py Message-ID: <20060124100203.63D0C1E400B@bag.python.org> Author: gregory.p.smith Date: Tue Jan 24 11:02:02 2006 New Revision: 42170 Modified: python/branches/release24-maint/Modules/_bsddb.c python/branches/release24-maint/setup.py Log: backport of commit 42169 adds support for compiling against BerkeleyDB 4.4.x Modified: python/branches/release24-maint/Modules/_bsddb.c ============================================================================== --- python/branches/release24-maint/Modules/_bsddb.c (original) +++ python/branches/release24-maint/Modules/_bsddb.c Tue Jan 24 11:02:02 2006 @@ -97,7 +97,7 @@ #error "eek! DBVER can't handle minor versions > 9" #endif -#define PY_BSDDB_VERSION "4.3.0" +#define PY_BSDDB_VERSION "4.3.0.1" static char *rcs_id = "$Id$"; @@ -4099,8 +4099,13 @@ #endif MAKE_ENTRY(nrequests); MAKE_ENTRY(nreleases); - MAKE_ENTRY(nnowaits); +#if (DBVER < 44) + MAKE_ENTRY(nnowaits); /* these were renamed in 4.4 */ MAKE_ENTRY(nconflicts); +#else + MAKE_ENTRY(lock_nowait); + MAKE_ENTRY(lock_wait); +#endif MAKE_ENTRY(ndeadlocks); MAKE_ENTRY(regsize); MAKE_ENTRY(region_wait); @@ -4936,7 +4941,11 @@ ADD_INT(d, DB_LOCK_IREAD); ADD_INT(d, DB_LOCK_IWR); #if (DBVER >= 33) +#if (DBVER < 44) ADD_INT(d, DB_LOCK_DIRTY); +#else + ADD_INT(d, DB_LOCK_READ_UNCOMMITTED); /* renamed in 4.4 */ +#endif ADD_INT(d, DB_LOCK_WWRITE); #endif @@ -5038,6 +5047,11 @@ ADD_INT(d, DB_MULTIPLE_KEY); #endif +#if (DBVER >= 44) + ADD_INT(d, DB_READ_UNCOMMITTED); /* replaces DB_DIRTY_READ in 4.4 */ + ADD_INT(d, DB_READ_COMMITTED); +#endif + #if (DBVER >= 33) ADD_INT(d, DB_DONOTINDEX); #endif Modified: python/branches/release24-maint/setup.py ============================================================================== --- python/branches/release24-maint/setup.py (original) +++ python/branches/release24-maint/setup.py Tue Jan 24 11:02:02 2006 @@ -485,12 +485,12 @@ # Sleepycat Berkeley DB interface. http://www.sleepycat.com # # This requires the Sleepycat DB code. The earliest supported version - # of that library is 3.2, the latest supported version is 4.3. A list + # of that library is 3.2, the latest supported version is 4.4. A list # of available releases can be found at # # http://www.sleepycat.com/update/index.html - max_db_ver = (4, 3) + max_db_ver = (4, 4) min_db_ver = (3, 2) db_setup_debug = False # verbose debug prints from this script? @@ -507,17 +507,19 @@ '/sw/include/db3', ] # 4.x minor number specific paths - for x in (0,1,2,3): + for x in (0,1,2,3,4): db_inc_paths.append('/usr/include/db4%d' % x) db_inc_paths.append('/usr/local/BerkeleyDB.4.%d/include' % x) db_inc_paths.append('/usr/local/include/db4%d' % x) db_inc_paths.append('/pkg/db-4.%d/include' % x) + db_inc_paths.append('/opt/db-4.%d/include' % x) # 3.x minor number specific paths for x in (2,3): db_inc_paths.append('/usr/include/db3%d' % x) db_inc_paths.append('/usr/local/BerkeleyDB.3.%d/include' % x) db_inc_paths.append('/usr/local/include/db3%d' % x) db_inc_paths.append('/pkg/db-3.%d/include' % x) + db_inc_paths.append('/opt/db-3.%d/include' % x) db_ver_inc_map = {} From python-checkins at python.org Tue Jan 24 16:51:23 2006 From: python-checkins at python.org (martin.v.loewis) Date: Tue, 24 Jan 2006 16:51:23 +0100 (CET) Subject: [Python-checkins] r42171 - in python/trunk: Lib/urllib.py Misc/ACKS Misc/NEWS Message-ID: <20060124155123.3B25A1E4085@bag.python.org> Author: martin.v.loewis Date: Tue Jan 24 16:51:21 2006 New Revision: 42171 Modified: python/trunk/Lib/urllib.py python/trunk/Misc/ACKS python/trunk/Misc/NEWS Log: Patch #1349118: urllib2 now supports user:pass@ style proxy specifications, raises IOErrors when proxies for unsupported protocols are defined, and uses the https proxy on https redirections. Modified: python/trunk/Lib/urllib.py ============================================================================== --- python/trunk/Lib/urllib.py (original) +++ python/trunk/Lib/urllib.py Tue Jan 24 16:51:21 2006 @@ -37,7 +37,7 @@ "splitnport", "splitquery", "splitattr", "splitvalue", "splitgophertype", "getproxies"] -__version__ = '1.16' # XXX This version is not always updated :-( +__version__ = '1.17' # XXX This version is not always updated :-( MAXFTPCACHE = 10 # Trim the ftp cache beyond this size @@ -271,6 +271,7 @@ """Use HTTP protocol.""" import httplib user_passwd = None + proxy_passwd= None if isinstance(url, str): host, selector = splithost(url) if host: @@ -279,6 +280,9 @@ realhost = host else: host, selector = url + # check whether the proxy contains authorization information + proxy_passwd, host = splituser(host) + # now we proceed with the url we want to obtain urltype, rest = splittype(selector) url = rest user_passwd = None @@ -295,6 +299,13 @@ #print "proxy via http:", host, selector if not host: raise IOError, ('http error', 'no host given') + + if proxy_passwd: + import base64 + proxy_auth = base64.encodestring(proxy_passwd).strip() + else: + proxy_auth = None + if user_passwd: import base64 auth = base64.encodestring(user_passwd).strip() @@ -307,6 +318,7 @@ h.putheader('Content-length', '%d' % len(data)) else: h.putrequest('GET', selector) + if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) if auth: h.putheader('Authorization', 'Basic %s' % auth) if realhost: h.putheader('Host', realhost) for args in self.addheaders: h.putheader(*args) @@ -349,6 +361,7 @@ """Use HTTPS protocol.""" import httplib user_passwd = None + proxy_passwd = None if isinstance(url, str): host, selector = splithost(url) if host: @@ -357,6 +370,8 @@ realhost = host else: host, selector = url + # here, we determine, whether the proxy contains authorization information + proxy_passwd, host = splituser(host) urltype, rest = splittype(selector) url = rest user_passwd = None @@ -370,6 +385,11 @@ selector = "%s://%s%s" % (urltype, realhost, rest) #print "proxy via https:", host, selector if not host: raise IOError, ('https error', 'no host given') + if proxy_passwd: + import base64 + proxy_auth = base64.encodestring(proxy_passwd).strip() + else: + proxy_auth = None if user_passwd: import base64 auth = base64.encodestring(user_passwd).strip() @@ -385,7 +405,8 @@ h.putheader('Content-length', '%d' % len(data)) else: h.putrequest('GET', selector) - if auth: h.putheader('Authorization', 'Basic %s' % auth) + if proxy_auth: h.putheader('Proxy-Authorization: Basic %s' % proxy_auth) + if auth: h.putheader('Authorization: Basic %s' % auth) if realhost: h.putheader('Host', realhost) for args in self.addheaders: h.putheader(*args) h.endheaders() @@ -404,6 +425,8 @@ def open_gopher(self, url): """Use Gopher protocol.""" + if not isinstance(url, str): + raise IOError, ('gopher error', 'proxy support for gopher protocol currently not implemented') import gopherlib host, selector = splithost(url) if not host: raise IOError, ('gopher error', 'no host given') @@ -419,6 +442,8 @@ return addinfourl(fp, noheaders(), "gopher:" + url) def open_file(self, url): + if not isinstance(url, str): + raise IOError, ('file error', 'proxy support for file protocol currently not implemented') """Use local file or FTP depending on form of URL.""" if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': return self.open_ftp(url) @@ -462,6 +487,8 @@ def open_ftp(self, url): """Use FTP protocol.""" + if not isinstance(url, str): + raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented') import mimetypes, mimetools try: from cStringIO import StringIO @@ -522,6 +549,8 @@ def open_data(self, url, data=None): """Use "data" URL.""" + if not isinstance(url, str): + raise IOError, ('data error', 'proxy support for data protocol currently not implemented') # ignore POSTed data # # syntax of data URLs: @@ -624,8 +653,7 @@ def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): """Error 401 -- authentication required. - See this URL for a description of the basic authentication scheme: - http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt""" + This function supports Basic authentication only.""" if not 'www-authenticate' in headers: URLopener.http_error_default(self, url, fp, errcode, errmsg, headers) @@ -644,7 +672,63 @@ return getattr(self,name)(url, realm) else: return getattr(self,name)(url, realm, data) + + def http_error_407(self, url, fp, errcode, errmsg, headers, data=None): + """Error 407 -- proxy authentication required. + This function supports Basic authentication only.""" + if not 'proxy-authenticate' in headers: + URLopener.http_error_default(self, url, fp, + errcode, errmsg, headers) + stuff = headers['proxy-authenticate'] + import re + match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) + if not match: + URLopener.http_error_default(self, url, fp, + errcode, errmsg, headers) + scheme, realm = match.groups() + if scheme.lower() != 'basic': + URLopener.http_error_default(self, url, fp, + errcode, errmsg, headers) + name = 'retry_proxy_' + self.type + '_basic_auth' + if data is None: + return getattr(self,name)(url, realm) + else: + return getattr(self,name)(url, realm, data) + + def retry_proxy_http_basic_auth(self, url, realm, data=None): + host, selector = splithost(url) + newurl = 'http://' + host + selector + proxy = self.proxies['http'] + urltype, proxyhost = splittype(proxy) + proxyhost, proxyselector = splithost(proxyhost) + i = proxyhost.find('@') + 1 + proxyhost = proxyhost[i:] + user, passwd = self.get_user_passwd(proxyhost, realm, i) + if not (user or passwd): return None + proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost + self.proxies['http'] = 'http://' + proxyhost + proxyselector + if data is None: + return self.open(newurl) + else: + return self.open(newurl, data) + def retry_proxy_https_basic_auth(self, url, realm, data=None): + host, selector = splithost(url) + newurl = 'https://' + host + selector + proxy = self.proxies['https'] + urltype, proxyhost = splittype(proxy) + proxyhost, proxyselector = splithost(proxyhost) + i = proxyhost.find('@') + 1 + proxyhost = proxyhost[i:] + user, passwd = self.get_user_passwd(proxyhost, realm, i) + if not (user or passwd): return None + proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost + self.proxies['https'] = 'https://' + proxyhost + proxyselector + if data is None: + return self.open(newurl) + else: + return self.open(newurl, data) + def retry_http_basic_auth(self, url, realm, data=None): host, selector = splithost(url) i = host.find('@') + 1 @@ -665,8 +749,11 @@ user, passwd = self.get_user_passwd(host, realm, i) if not (user or passwd): return None host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host - newurl = '//' + host + selector - return self.open_https(newurl, data) + newurl = 'https://' + host + selector + if data is None: + return self.open(newurl) + else: + return self.open(newurl, data) def get_user_passwd(self, host, realm, clear_cache = 0): key = realm + '@' + host.lower() Modified: python/trunk/Misc/ACKS ============================================================================== --- python/trunk/Misc/ACKS (original) +++ python/trunk/Misc/ACKS Tue Jan 24 16:51:21 2006 @@ -427,6 +427,7 @@ Chad Netzer Max Neunhöffer George Neville-Neil +Johannes Nicolai Samuel Nicolary Gustavo Niemeyer Oscar Nierstrasz Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Tue Jan 24 16:51:21 2006 @@ -337,6 +337,10 @@ Library ------- +- Patch #1349118: urllib2 now supports user:pass@ style proxy + specifications, raises IOErrors when proxies for unsupported protocols + are defined, and uses the https proxy on https redirections. + - Bug #902075: urllib2 now supports 'host:port' style proxy specifications. - Bug #1407902: Add support for sftp:// URIs to urlparse. From python-checkins at python.org Tue Jan 24 17:16:20 2006 From: python-checkins at python.org (fredrik.lundh) Date: Tue, 24 Jan 2006 17:16:20 +0100 (CET) Subject: [Python-checkins] r42172 - python/trunk/Doc/tools/listmodules.py Message-ID: <20060124161620.458D91E4002@bag.python.org> Author: fredrik.lundh Date: Tue Jan 24 17:16:19 2006 New Revision: 42172 Added: python/trunk/Doc/tools/listmodules.py (contents, props changed) Log: module list utility Added: python/trunk/Doc/tools/listmodules.py ============================================================================== --- (empty file) +++ python/trunk/Doc/tools/listmodules.py Tue Jan 24 17:16:19 2006 @@ -0,0 +1,126 @@ +# $Id$ +# +# Locate all standard modules available in this build. +# +# This script is designed to run on Python 1.5.2 and newer. +# +# Written by Fredrik Lundh, January 2005 +# + +import imp, sys, os, re, time + +identifier = "python-%s-%s" % (sys.version[:3], sys.platform) +timestamp = time.strftime("%Y%m%dT%H%M%SZ", time.gmtime(time.time())) + +# known test packages +TEST_PACKAGES = "test.", "bsddb.test.", "distutils.tests." + +try: + import platform + platform = platform.platform() +except: + platform = None # unknown + +suffixes = imp.get_suffixes() + +def get_suffix(file): + for suffix in suffixes: + if file[-len(suffix[0]):] == suffix[0]: + return suffix + return None + +def main(): + + path = getpath() + + modules = {} + for m in sys.builtin_module_names: + modules[m] = None + + for p in path: + modules.update(getmodules(p)) + + keys = modules.keys() + keys.sort() + + # filter out known test packages + def cb(m): + for d in TEST_PACKAGES: + if m[:len(d)] == d: + return 0 + return 1 + keys = filter(cb, keys) + + try: + outfile = sys.argv[1] + if outfile == "-": + outfile = None + elif outfile == "-f": + outfile = "modules-" + identifier + ".txt" + except IndexError: + outfile = None + + if not outfile: + out = sys.stdout + else: + out = open(outfile, "w") + + out.write("# module list (generated by listmodules.py)\n") + out.write("#\n") + out.write("# timestamp=%s\n" % repr(timestamp)) + out.write("# sys.version=%s\n" % repr(sys.version)) + out.write("# sys.platform=%s\n" % repr(sys.platform)) + if platform: + out.write("# platform=%s\n" % repr(platform)) + out.write("#\n") + + for k in keys: + out.write(k + "\n") + + if out is not sys.stdout: + out.close() + print out.name, "ok (%d modules)" % len(modules) + +def getmodules(p): + # get modules in a given directory + modules = {} + for f in os.listdir(p): + f = os.path.join(p, f) + if os.path.isfile(f): + m, e = os.path.splitext(f) + suffix = get_suffix(f) + if not suffix: + continue + m = os.path.basename(m) + if re.compile("(?i)[a-z_]\w*$").match(m): + if suffix[2] == imp.C_EXTENSION: + # check that this extension can be imported + try: + __import__(m) + except ImportError: + continue + modules[m] = f + elif os.path.isdir(f): + m = os.path.basename(f) + if os.path.isfile(os.path.join(f, "__init__.py")): + for mm, f in getmodules(f).items(): + modules[m + "." + mm] = f + return modules + +def getpath(): + path = map(os.path.normcase, map(os.path.abspath, sys.path[:])) + # get rid of site packages + for p in path: + if p[-13:] == "site-packages": + def cb(p, site_package_path=os.path.abspath(p)): + return p[:len(site_package_path)] != site_package_path + path = filter(cb, path) + break + # get rid of non-existent directories and the current directory + def cb(p, cwd=os.path.normcase(os.getcwd())): + return os.path.isdir(p) and p != cwd + path = filter(cb, path) + return path + +if __name__ == "__main__": + main() From mwh at python.net Tue Jan 24 17:29:11 2006 From: mwh at python.net (Michael Hudson) Date: Tue, 24 Jan 2006 16:29:11 +0000 Subject: [Python-checkins] r42149 - python/trunk/Mac/Modules/file/_Filemodule.c In-Reply-To: <20060123072530.705231E4039@bag.python.org> (neal norwitz's message of "Mon, 23 Jan 2006 08:25:30 +0100 (CET)") References: <20060123072530.705231E4039@bag.python.org> Message-ID: <2my815lhd4.fsf@starship.python.net> "neal.norwitz" writes: > Author: neal.norwitz > Date: Mon Jan 23 08:25:29 2006 > New Revision: 42149 > > Modified: > python/trunk/Mac/Modules/file/_Filemodule.c > Log: > This should fix a mem leak on the Mac. Brett tested it. > > Modified: python/trunk/Mac/Modules/file/_Filemodule.c > ============================================================================== > --- python/trunk/Mac/Modules/file/_Filemodule.c (original) > +++ python/trunk/Mac/Modules/file/_Filemodule.c Mon Jan 23 08:25:29 2006 > @@ -3204,11 +3204,10 @@ > char *path = NULL; > if (!PyArg_Parse(v, "et", Py_FileSystemDefaultEncoding, &path)) > return 0; > - if ( (err=FSPathMakeRef(path, fsr, NULL)) ) { > + if ( (err=FSPathMakeRef(path, fsr, NULL)) ) > PyMac_Error(err); > - return 0; > - } > - return 1; > + PyMem_Free(path); > + return !err; > } > /* XXXX Should try unicode here too */ > /* Otherwise we try to go via an FSSpec */ Isn't this file autogenerated? I think you probably want to make the same change in filesupport.py. Cheers, mwh -- 31. Simplicity does not precede complexity, but follows it. -- Alan Perlis, http://www.cs.yale.edu/homes/perlis-alan/quotes.html From nnorwitz at gmail.com Tue Jan 24 19:40:58 2006 From: nnorwitz at gmail.com (Neal Norwitz) Date: Tue, 24 Jan 2006 10:40:58 -0800 Subject: [Python-checkins] r42149 - python/trunk/Mac/Modules/file/_Filemodule.c In-Reply-To: <2my815lhd4.fsf@starship.python.net> References: <20060123072530.705231E4039@bag.python.org> <2my815lhd4.fsf@starship.python.net> Message-ID: On 1/24/06, Michael Hudson wrote: > > > > Modified: > > python/trunk/Mac/Modules/file/_Filemodule.c > > Isn't this file autogenerated? I think you probably want to make the > same change in filesupport.py. Damn, I had no idea it was generated. I'll try to fix it tonight and add a comment at the top even though I probably would have missed the comment too. Thanks, n From python-checkins at python.org Tue Jan 24 20:55:04 2006 From: python-checkins at python.org (gregory.p.smith) Date: Tue, 24 Jan 2006 20:55:04 +0100 (CET) Subject: [Python-checkins] r42173 - python/trunk/Modules/_bsddb.c Message-ID: <20060124195504.86E3F1E4002@bag.python.org> Author: gregory.p.smith Date: Tue Jan 24 20:55:02 2006 New Revision: 42173 Modified: python/trunk/Modules/_bsddb.c Log: commits sourceforge patch #1407992 by neil.norwitz. this fixes the bsddb db associate tests when compiled against BerkeleyDB 3.3 thru 4.1. 4.2 thru 4.4 already passed and still pass. Modified: python/trunk/Modules/_bsddb.c ============================================================================== --- python/trunk/Modules/_bsddb.c (original) +++ python/trunk/Modules/_bsddb.c Tue Jan 24 20:55:02 2006 @@ -97,7 +97,7 @@ #error "eek! DBVER can't handle minor versions > 9" #endif -#define PY_BSDDB_VERSION "4.4.0" +#define PY_BSDDB_VERSION "4.4.1" static char *rcs_id = "$Id$"; @@ -912,7 +912,7 @@ } #endif - if (self->db_env) { + if (self->db_env && !self->closed) { MYDB_BEGIN_ALLOW_THREADS; self->db_env->close(self->db_env, 0); MYDB_END_ALLOW_THREADS; @@ -1534,11 +1534,11 @@ keyObj = PyInt_FromLong(*(int *)key.data); else keyObj = PyString_FromStringAndSize(key.data, key.size); - retval = Py_BuildValue("OOO", keyObj, pkeyObj, dataObj); + retval = PyTuple_Pack(3, keyObj, pkeyObj, dataObj); } else /* return just the pkey and data */ { - retval = Py_BuildValue("OO", pkeyObj, dataObj); + retval = PyTuple_Pack(2, pkeyObj, dataObj); } FREE_DBT(pkey); FREE_DBT(data); @@ -3176,7 +3176,7 @@ else pkeyObj = PyString_FromStringAndSize(pkey.data, pkey.size); - if (flags & DB_SET_RECNO) /* return key, pkey and data */ + if (key.data && key.size) /* return key, pkey and data */ { PyObject *keyObj; int type = _DB_get_type(self->mydb); @@ -3184,12 +3184,12 @@ keyObj = PyInt_FromLong(*(int *)key.data); else keyObj = PyString_FromStringAndSize(key.data, key.size); - retval = Py_BuildValue("OOO", keyObj, pkeyObj, dataObj); + retval = PyTuple_Pack(3, keyObj, pkeyObj, dataObj); FREE_DBT(key); } else /* return just the pkey and data */ { - retval = Py_BuildValue("OO", pkeyObj, dataObj); + retval = PyTuple_Pack(2, pkeyObj, dataObj); } FREE_DBT(pkey); FREE_DBT(data); From python-checkins at python.org Tue Jan 24 21:09:46 2006 From: python-checkins at python.org (gregory.p.smith) Date: Tue, 24 Jan 2006 21:09:46 +0100 (CET) Subject: [Python-checkins] r42174 - python/trunk/Misc/NEWS Message-ID: <20060124200946.BBBCE1E4002@bag.python.org> Author: gregory.p.smith Date: Tue Jan 24 21:09:45 2006 New Revision: 42174 Modified: python/trunk/Misc/NEWS Log: note the bsddb extension module changes. Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Tue Jan 24 21:09:45 2006 @@ -243,7 +243,12 @@ - Patch #1338314, Bug #1336623: fix tarfile so it can extract REGTYPE directories from tarfiles written by old programs. -- Get bsddb module to build with BSD DB version 3.2 +- Patch #1407992, fixes broken bsddb module db associate when using + BerkeleyDB 3.3, 4.0 or 4.1. + +- Get bsddb module to build with BerkeleyDB version 4.4 + +- Get bsddb module to build with BerkeleyDB version 3.2 - Patch #1309009, Fix segfault in pyexpat when the XML document is in latin_1, but Python incorrectly assumes it is in UTF-8 format From python-checkins at python.org Tue Jan 24 23:44:09 2006 From: python-checkins at python.org (tim.peters) Date: Tue, 24 Jan 2006 23:44:09 +0100 (CET) Subject: [Python-checkins] r42175 - python/trunk/Lib/urllib.py Message-ID: <20060124224409.8C6911E4002@bag.python.org> Author: tim.peters Date: Tue Jan 24 23:44:08 2006 New Revision: 42175 Modified: python/trunk/Lib/urllib.py Log: Whitespace normalization. Modified: python/trunk/Lib/urllib.py ============================================================================== --- python/trunk/Lib/urllib.py (original) +++ python/trunk/Lib/urllib.py Tue Jan 24 23:44:08 2006 @@ -299,7 +299,7 @@ #print "proxy via http:", host, selector if not host: raise IOError, ('http error', 'no host given') - + if proxy_passwd: import base64 proxy_auth = base64.encodestring(proxy_passwd).strip() @@ -672,7 +672,7 @@ return getattr(self,name)(url, realm) else: return getattr(self,name)(url, realm, data) - + def http_error_407(self, url, fp, errcode, errmsg, headers, data=None): """Error 407 -- proxy authentication required. This function supports Basic authentication only.""" @@ -694,7 +694,7 @@ return getattr(self,name)(url, realm) else: return getattr(self,name)(url, realm, data) - + def retry_proxy_http_basic_auth(self, url, realm, data=None): host, selector = splithost(url) newurl = 'http://' + host + selector @@ -728,7 +728,7 @@ return self.open(newurl) else: return self.open(newurl, data) - + def retry_http_basic_auth(self, url, realm, data=None): host, selector = splithost(url) i = host.find('@') + 1 From python-checkins at python.org Tue Jan 24 23:44:54 2006 From: python-checkins at python.org (tim.peters) Date: Tue, 24 Jan 2006 23:44:54 +0100 (CET) Subject: [Python-checkins] r42176 - python/trunk/Lib/test/test_socket_ssl.py Message-ID: <20060124224454.79BB81E4002@bag.python.org> Author: tim.peters Date: Tue Jan 24 23:44:54 2006 New Revision: 42176 Modified: python/trunk/Lib/test/test_socket_ssl.py Log: test_rude_shutdown(): Rewrote to use proper thread synchronization and termination. Modified: python/trunk/Lib/test/test_socket_ssl.py ============================================================================== --- python/trunk/Lib/test/test_socket_ssl.py (original) +++ python/trunk/Lib/test/test_socket_ssl.py Tue Jan 24 23:44:54 2006 @@ -2,7 +2,6 @@ from test import test_support import socket -import time # Optionally test SSL support. This requires the 'network' resource as given # on the regrtest command line. @@ -29,34 +28,43 @@ def test_rude_shutdown(): try: - import thread + import threading except ImportError: return - # some random port to connect to + # Some random port to connect to. PORT = 9934 + + listener_gone = threading.Event() + + # `listener` runs in a thread. It opens a socket listening on PORT, and + # sits in an accept() until the main thread connects. Then it rudely + # closes the socket, and sets Event `listener_gone` to let the main thread + # know the socket is gone. def listener(): - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s = socket.socket() s.bind(('', PORT)) s.listen(5) s.accept() - del s - thread.exit() + s = None # reclaim the socket object, which also closes it + listener_gone.set() def connector(): - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s = socket.socket() s.connect(('localhost', PORT)) + listener_gone.wait() try: ssl_sock = socket.ssl(s) except socket.sslerror: pass else: - raise test_support.TestFailed, \ - 'connecting to closed SSL socket failed' + raise test_support.TestFailed( + 'connecting to closed SSL socket should have failed') - thread.start_new_thread(listener, ()) - time.sleep(1) + t = threading.Thread(target=listener) + t.start() connector() + t.join() def test_main(): if not hasattr(socket, "ssl"): From python-checkins at python.org Wed Jan 25 06:21:58 2006 From: python-checkins at python.org (neal.norwitz) Date: Wed, 25 Jan 2006 06:21:58 +0100 (CET) Subject: [Python-checkins] r42177 - in python/trunk: Lib/bsddb/test/test_1413192.py Lib/bsddb/test/test_all.py Lib/test/test_bsddb3.py Misc/NEWS Modules/_bsddb.c Message-ID: <20060125052158.19C481E4002@bag.python.org> Author: neal.norwitz Date: Wed Jan 25 06:21:55 2006 New Revision: 42177 Added: python/trunk/Lib/bsddb/test/test_1413192.py Modified: python/trunk/Lib/bsddb/test/test_all.py python/trunk/Lib/test/test_bsddb3.py python/trunk/Misc/NEWS python/trunk/Modules/_bsddb.c Log: Fix bug #1413192, fix seg fault in bsddb if a txn was deleted before the env. Will backport. Added: python/trunk/Lib/bsddb/test/test_1413192.py ============================================================================== --- (empty file) +++ python/trunk/Lib/bsddb/test/test_1413192.py Wed Jan 25 06:21:55 2006 @@ -0,0 +1,16 @@ + +# http://python.org/sf/1413192 +# +# This test relies on the variable names, see the bug report for details. +# The problem was that the env was deallocated prior to the txn. + +from bsddb import db + +env_name = '.' + +env = db.DBEnv() +env.open(env_name, db.DB_CREATE | db.DB_INIT_TXN) +the_txn = env.txn_begin() + +map = db.DB(env) +map.open('xxx.db', "p", db.DB_HASH, db.DB_CREATE, 0666, txn=the_txn) Modified: python/trunk/Lib/bsddb/test/test_all.py ============================================================================== --- python/trunk/Lib/bsddb/test/test_all.py (original) +++ python/trunk/Lib/bsddb/test/test_all.py Wed Jan 25 06:21:55 2006 @@ -46,6 +46,12 @@ def suite(): + try: + # this is special, it used to segfault the interpreter + import test_1413192 + except: + pass + test_modules = [ 'test_associate', 'test_basics', Modified: python/trunk/Lib/test/test_bsddb3.py ============================================================================== --- python/trunk/Lib/test/test_bsddb3.py (original) +++ python/trunk/Lib/test/test_bsddb3.py Wed Jan 25 06:21:55 2006 @@ -22,6 +22,12 @@ def suite(): + try: + # this is special, it used to segfault the interpreter + import bsddb.test.test_1413192 + except: + pass + test_modules = [ 'test_associate', 'test_basics', Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Wed Jan 25 06:21:55 2006 @@ -216,6 +216,9 @@ Extension Modules ----------------- +- Bug #1413192, fix seg fault in bsddb if a transaction was deleted + before the env. + - Patch #1103116: Basic AF_NETLINK support. - Bug #1402308, (possible) segfault when using mmap.mmap(-1, ...) Modified: python/trunk/Modules/_bsddb.c ============================================================================== --- python/trunk/Modules/_bsddb.c (original) +++ python/trunk/Modules/_bsddb.c Wed Jan 25 06:21:55 2006 @@ -266,6 +266,7 @@ typedef struct { PyObject_HEAD DB_TXN* txn; + PyObject *env; #ifdef HAVE_WEAKREF PyObject *in_weakreflist; /* List of weak references */ #endif @@ -928,6 +929,8 @@ DBTxnObject* self = PyObject_New(DBTxnObject, &DBTxn_Type); if (self == NULL) return NULL; + Py_INCREF(myenv); + self->env = (PyObject*)myenv; #ifdef HAVE_WEAKREF self->in_weakreflist = NULL; #endif @@ -938,11 +941,10 @@ #else err = txn_begin(myenv->db_env, parent, &(self->txn), flags); #endif - /* TODO add a weakref(self) to the self->myenvobj->open_child_weakrefs - * list so that a DBEnv can refuse to close without aborting any open - * open DBTxns and closing any open DBs first. */ MYDB_END_ALLOW_THREADS; if (makeDBError(err)) { + Py_DECREF(self->env); + PyObject_Del(self); self = NULL; } return self; @@ -973,6 +975,7 @@ } #endif + Py_DECREF(self->env); PyObject_Del(self); } From python-checkins at python.org Wed Jan 25 06:29:19 2006 From: python-checkins at python.org (neal.norwitz) Date: Wed, 25 Jan 2006 06:29:19 +0100 (CET) Subject: [Python-checkins] r42178 - in python/branches/release24-maint: Lib/bsddb/test/test_1413192.py Lib/bsddb/test/test_all.py Lib/test/test_bsddb3.py Misc/NEWS Modules/_bsddb.c Message-ID: <20060125052919.CBD891E4002@bag.python.org> Author: neal.norwitz Date: Wed Jan 25 06:29:17 2006 New Revision: 42178 Added: python/branches/release24-maint/Lib/bsddb/test/test_1413192.py - copied unchanged from r42177, python/trunk/Lib/bsddb/test/test_1413192.py Modified: python/branches/release24-maint/Lib/bsddb/test/test_all.py python/branches/release24-maint/Lib/test/test_bsddb3.py python/branches/release24-maint/Misc/NEWS python/branches/release24-maint/Modules/_bsddb.c Log: Backport: Fix bug #1413192, fix seg fault in bsddb if a txn was deleted before the env. Modified: python/branches/release24-maint/Lib/bsddb/test/test_all.py ============================================================================== --- python/branches/release24-maint/Lib/bsddb/test/test_all.py (original) +++ python/branches/release24-maint/Lib/bsddb/test/test_all.py Wed Jan 25 06:29:17 2006 @@ -46,6 +46,12 @@ def suite(): + try: + # this is special, it used to segfault the interpreter + import test_1413192 + except: + pass + test_modules = [ 'test_associate', 'test_basics', Modified: python/branches/release24-maint/Lib/test/test_bsddb3.py ============================================================================== --- python/branches/release24-maint/Lib/test/test_bsddb3.py (original) +++ python/branches/release24-maint/Lib/test/test_bsddb3.py Wed Jan 25 06:29:17 2006 @@ -22,6 +22,12 @@ def suite(): + try: + # this is special, it used to segfault the interpreter + import bsddb.test.test_1413192 + except: + pass + test_modules = [ 'test_associate', 'test_basics', Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Wed Jan 25 06:29:17 2006 @@ -208,6 +208,9 @@ Extension Modules ----------------- +- Bug #1413192, fix seg fault in bsddb if a transaction was deleted + before the env. + - Bug #1402308, (possible) segfault when using mmap.mmap(-1, ...) - Bug #1400822, _curses over{lay,write} doesn't work when passing 6 ints. Modified: python/branches/release24-maint/Modules/_bsddb.c ============================================================================== --- python/branches/release24-maint/Modules/_bsddb.c (original) +++ python/branches/release24-maint/Modules/_bsddb.c Wed Jan 25 06:29:17 2006 @@ -265,6 +265,7 @@ typedef struct { PyObject_HEAD DB_TXN* txn; + PyObject *env; #ifdef HAVE_WEAKREF PyObject *in_weakreflist; /* List of weak references */ #endif @@ -921,6 +922,8 @@ DBTxnObject* self = PyObject_New(DBTxnObject, &DBTxn_Type); if (self == NULL) return NULL; + Py_INCREF(myenv); + self->env = (PyObject*)myenv; #ifdef HAVE_WEAKREF self->in_weakreflist = NULL; #endif @@ -931,11 +934,10 @@ #else err = txn_begin(myenv->db_env, parent, &(self->txn), flags); #endif - /* TODO add a weakref(self) to the self->myenvobj->open_child_weakrefs - * list so that a DBEnv can refuse to close without aborting any open - * open DBTxns and closing any open DBs first. */ MYDB_END_ALLOW_THREADS; if (makeDBError(err)) { + Py_DECREF(self->env); + PyObject_Del(self); self = NULL; } return self; @@ -966,6 +968,7 @@ } #endif + Py_DECREF(self->env); PyObject_Del(self); } From python-checkins at python.org Wed Jan 25 06:42:23 2006 From: python-checkins at python.org (neal.norwitz) Date: Wed, 25 Jan 2006 06:42:23 +0100 (CET) Subject: [Python-checkins] r42179 - python/trunk/Mac/Modules/file/filesupport.py Message-ID: <20060125054223.4CF271E4002@bag.python.org> Author: neal.norwitz Date: Wed Jan 25 06:42:22 2006 New Revision: 42179 Modified: python/trunk/Mac/Modules/file/filesupport.py Log: mwh pointed out _Filemodule.c is generated. So to fix the memory leak this file (the source) must be fixed. Why isn't there a comment at the top saying the file is generated and why are both the source and generated file checked in? Bootstrap problem? Will backport. Modified: python/trunk/Mac/Modules/file/filesupport.py ============================================================================== --- python/trunk/Mac/Modules/file/filesupport.py (original) +++ python/trunk/Mac/Modules/file/filesupport.py Wed Jan 25 06:42:22 2006 @@ -302,11 +302,10 @@ char *path = NULL; if (!PyArg_Parse(v, "et", Py_FileSystemDefaultEncoding, &path)) return 0; - if ( (err=FSPathMakeRef(path, fsr, NULL)) ) { + if ( (err=FSPathMakeRef(path, fsr, NULL)) ) PyMac_Error(err); - return 0; - } - return 1; + PyMem_Free(path); + return !err; } /* XXXX Should try unicode here too */ /* Otherwise we try to go via an FSSpec */ From python-checkins at python.org Wed Jan 25 06:43:54 2006 From: python-checkins at python.org (neal.norwitz) Date: Wed, 25 Jan 2006 06:43:54 +0100 (CET) Subject: [Python-checkins] r42180 - python/branches/release24-maint/Mac/Modules/file/filesupport.py Message-ID: <20060125054354.6BEE71E4002@bag.python.org> Author: neal.norwitz Date: Wed Jan 25 06:43:53 2006 New Revision: 42180 Modified: python/branches/release24-maint/Mac/Modules/file/filesupport.py Log: Backport: mwh pointed out _Filemodule.c is generated. So to fix the memory leak this file (the source) must be fixed. Modified: python/branches/release24-maint/Mac/Modules/file/filesupport.py ============================================================================== --- python/branches/release24-maint/Mac/Modules/file/filesupport.py (original) +++ python/branches/release24-maint/Mac/Modules/file/filesupport.py Wed Jan 25 06:43:53 2006 @@ -255,11 +255,10 @@ char *path = NULL; if (!PyArg_Parse(v, "et", Py_FileSystemDefaultEncoding, &path)) return NULL; - if ( (err=FSPathMakeRef(path, fsr, NULL)) ) { + if ( (err=FSPathMakeRef(path, fsr, NULL)) ) PyMac_Error(err); - return 0; - } - return 1; + PyMem_Free(path); + return !err; } /* XXXX Should try unicode here too */ /* Otherwise we try to go via an FSSpec */ From nnorwitz at gmail.com Wed Jan 25 06:45:51 2006 From: nnorwitz at gmail.com (Neal Norwitz) Date: Tue, 24 Jan 2006 21:45:51 -0800 Subject: [Python-checkins] r42149 - python/trunk/Mac/Modules/file/_Filemodule.c In-Reply-To: <2my815lhd4.fsf@starship.python.net> References: <20060123072530.705231E4039@bag.python.org> <2my815lhd4.fsf@starship.python.net> Message-ID: On 1/24/06, Michael Hudson wrote: > > > Modified: > > python/trunk/Mac/Modules/file/_Filemodule.c > > Isn't this file autogenerated? I think you probably want to make the > same change in filesupport.py. Thanks. I can't test these changes myself, so I want to make the minimal changes. As I said in the checkin: * Why isn't there a comment at the top saying the file is generated and why are both the source and generated file checked in? Bootstrap problem? n From mala4000 at bol.com.br Wed Jan 25 06:44:35 2006 From: mala4000 at bol.com.br (Lidiana Alves) Date: Wed, 25 Jan 2006 02:44:35 -0300 Subject: [Python-checkins] Modelos prontos de cartas e e-mails comerciais Message-ID: <20060125054552.B6CE278A6@sankara2.bol.com.br> Modelos prontos de cartas e e-mails comerciais. Visite o site: http://www.gueb.de/cartascomerciais E veja alguns dos modelos abaixo: Procura??o Carta de Recomenda??o Convite para Exposi??o ou Feira AGRADECIMENTOS E CONDOL?NCIAS ? Agradecimento de convite e felicita??es; ? Agradecimento e convite para solenidade; ? Agradecimento de mensagem de p?sames; ? Agradecimento de pedido; ? Agradecimento e boas vindas a cliente novo; ? Agradecimento por mensagem de felicita??o; ? Confraterniza??o; ? Congratula??es; ? Cumprimentos por resultados comerciais; ? Felicita??es pessoais; ? P?sames; http://www.gueb.de/cartascomerciais ? Votos de boas festas Voltar ao topo http://www.gueb.de/cartascomerciais CARTAS DE RECLAMA??O ? Reclama??o de compra de produto; ? Reclama??o por atraso; ? Reclama??o por aumento de pre?o; ? Reclama??o por defici?ncia t?cnica; ? Reclama??o por demora na entrega; ? Reclama??o por diverg?ncia; ? Respostas a reclama??es; Voltar ao topo COMUNICADOS E AVISOS ? Advert?ncia a funcion?rio; ? Aviso de aumento de pre?os; ? Aviso de incorpora??o da empresa; ? Aviso de lan?amento de produto e servi?o; ? Aviso de mudan?a de endere?o; ? Aviso de ocorr?ncia de acidente; ? Aviso de t?rmino de contrato; ? Aviso gen?rico; ? Comunica??o de atraso no envio de mercadorias; ? Comunica??o de devolu??o de duplicata; ? Comunica??o de devolu??o de mercadoria; ? Comunica??o de envio de mercadorias; ? Comunica??o de envio de parte do pedido; ? Comunica??o de extravio de mercadorias; ? Comunica??o de f?rias coletivas; ? Comunica??o de liquida??o de d?bito; ? Comunica??o de novo servi?o de televendas; ? Comunica??o de reuni?o; ? Confirma??o de pedido; ? Resposta ao comunicado de reuni?o; Voltar ao topo http://www.gueb.de/cartascomerciais EMPREGO ? Aviso pr?vio de dispensa de empregado: 1, 2, e 3; ? Carta de recomenda??o; ? Pedido de demiss?o: 1 e 2; ? Solicita??o de emprego: 1, 2 e 3; ? Solicita??o de est?gio; Voltar ao topo ATESTADOS E DECLARA??ES ? Atestado de bons antecedentes; ? Atestado m?dico; ? Declara??o negativa de v?nculo empregat?cio; ? Declara??o para cancelamento de protesto; ? Declara??o para fins escolares; Voltar ao topo * Agradecimentos e condol?ncias * Atestados e Declara??es * Cartas de Cobran?as * Cartas de Reclama??o * Cartas em Ingl?s * Comunicados e Avisos * Convites * Documentos * Emprego * Propostas * Solicita??es e pedidos * Viagem CARTAS DE COBRAN?A ? Cartas de cobran?a: 1, 2, 3, 4, 5, 6, 7 e 8; ? Encaminhamento de cobran?a a protesto; ? Oferecimento de servi?o de cobran?a; ? Recebimento de d?bito pendente; Voltar ao topo CARTAS EM INGL?S ? Cancelamento de pedido; ? Carta de demiss?o; ? Carta de refer?ncia; ? Curriculum vitae; ? Pedido de produto: 1 e 2; ? Reclama??o de assinatura de publica??o; ? Remessa de valores; ? Resposta a pedido de produto; ? Resposta a solicita??o de emprego; ? Resposta a solicita??o de informa??es; ? Resposta a solicita??o de pre?os; ? Solicita??o de emprego; ? Solicita??o de informa??es comerciais; ? Solicita??o de licen?a; ? Solicita??o de pre?os; Voltar ao topo CONVITES http://www.gueb.de/cartascomerciais ? Convite para batizado; ? Convite para evento social; ? Convite para exposi??o ou feira; ? Convite para lan?amento de produto; ? Resposta negativa a convite; ? Resposta positiva a convite; Voltar ao topo DOCUMENTOS ? Ata; ? Contrato de loca??o de im?vel; ? Contrato firmado acordo; ? Contrato social; ? Edital de convoca??o; ? Procura??o; ? Recibo de venda de autom?vel; Voltar ao topo PROPOSTAS ? Proposta de abertura de conta corrente; ? Proposta de presta??o de servi?os: 1 e 2; ? Proposta de representa??o comercial: 1 e 2; ? Proposta para ocupa??o de cargo; ? Proposta para recupera??o de clientes; ? Resposta negativa ? proposta de representa??o: 1 e 2; ? Resposta positiva ? proposta de representa??o: 1 e 2; Voltar ao topo http://www.gueb.de/cartascomerciais SOLICITA??E E PEDIDOS ? Pedido de desculpas; ? Pedido de mercadorias; ? Resposta a pedido de carta de apresenta??o; ? Resposta a solicita??o de c?pias de documentos; ? Resposta a solicita??o de or?amento; ? Resposta negativa a solicita??o de informa??es comerciais; ? Resposta positiva a solicita??o de informa??es comerciais; ? Solicita??o de atestado de Idoneidade Financeira; ? Solicita??o de cat?logos de pre?os; ? Solicita??o de cr?dito; ? Solicita??o de informa??es comerciais; ? Solicita??o de informa??es sobre curso; ? Solicita??o de listas de pre?os; ? Solicita??es de refer?ncias pessoais; ? Suspens?o de pedido de mercadoria; Voltar ao topo VIAGEM ? Informa??es sobre requisitos de viagem; ? Pedido de reserva em hotel; ? Recupera??o de bagagem extraviada; ? Reclama??o de maus tratos ? bagagem; ? Recupera??o de objeto esquecido em hotel; ? Reserva de passagens; ? Roteiro tur?stico. http://www.gueb.de/cartascomerciais From python-checkins at python.org Wed Jan 25 08:20:47 2006 From: python-checkins at python.org (neal.norwitz) Date: Wed, 25 Jan 2006 08:20:47 +0100 (CET) Subject: [Python-checkins] r42181 - python/trunk/Lib/test/test_bsddb3.py Message-ID: <20060125072047.9D5CD1E4004@bag.python.org> Author: neal.norwitz Date: Wed Jan 25 08:20:47 2006 New Revision: 42181 Modified: python/trunk/Lib/test/test_bsddb3.py Log: Remove generated test db files Modified: python/trunk/Lib/test/test_bsddb3.py ============================================================================== --- python/trunk/Lib/test/test_bsddb3.py (original) +++ python/trunk/Lib/test/test_bsddb3.py Wed Jan 25 08:20:47 2006 @@ -4,7 +4,7 @@ """ import sys import unittest -from test.test_support import requires, verbose, run_suite +from test.test_support import requires, verbose, run_suite, unlink # When running as a script instead of within the regrtest framework, skip the # requires test, since it's obvious we want to run them. @@ -26,7 +26,8 @@ # this is special, it used to segfault the interpreter import bsddb.test.test_1413192 except: - pass + for f in ['__db.001', '__db.002', '__db.003', 'log.0000000001']: + unlink(f) test_modules = [ 'test_associate', From python-checkins at python.org Wed Jan 25 08:22:02 2006 From: python-checkins at python.org (neal.norwitz) Date: Wed, 25 Jan 2006 08:22:02 +0100 (CET) Subject: [Python-checkins] r42182 - python/branches/release24-maint/Lib/test/test_bsddb3.py Message-ID: <20060125072202.5FEDE1E4004@bag.python.org> Author: neal.norwitz Date: Wed Jan 25 08:22:01 2006 New Revision: 42182 Modified: python/branches/release24-maint/Lib/test/test_bsddb3.py Log: Remove generated test db files Modified: python/branches/release24-maint/Lib/test/test_bsddb3.py ============================================================================== --- python/branches/release24-maint/Lib/test/test_bsddb3.py (original) +++ python/branches/release24-maint/Lib/test/test_bsddb3.py Wed Jan 25 08:22:01 2006 @@ -4,7 +4,7 @@ """ import sys import unittest -from test.test_support import requires, verbose, run_suite +from test.test_support import requires, verbose, run_suite, unlink # When running as a script instead of within the regrtest framework, skip the # requires test, since it's obvious we want to run them. @@ -26,7 +26,8 @@ # this is special, it used to segfault the interpreter import bsddb.test.test_1413192 except: - pass + for f in ['__db.001', '__db.002', '__db.003', 'log.0000000001']: + unlink(f) test_modules = [ 'test_associate', From python-checkins at python.org Wed Jan 25 08:58:22 2006 From: python-checkins at python.org (neal.norwitz) Date: Wed, 25 Jan 2006 08:58:22 +0100 (CET) Subject: [Python-checkins] r42183 - python/branches/release24-maint/Lib/test/test_bsddb3.py Message-ID: <20060125075822.CEB051E4002@bag.python.org> Author: neal.norwitz Date: Wed Jan 25 08:58:22 2006 New Revision: 42183 Modified: python/branches/release24-maint/Lib/test/test_bsddb3.py Log: Remove file even if non-existant Modified: python/branches/release24-maint/Lib/test/test_bsddb3.py ============================================================================== --- python/branches/release24-maint/Lib/test/test_bsddb3.py (original) +++ python/branches/release24-maint/Lib/test/test_bsddb3.py Wed Jan 25 08:58:22 2006 @@ -2,9 +2,10 @@ """ Run all test cases. """ +import os import sys import unittest -from test.test_support import requires, verbose, run_suite, unlink +from test.test_support import requires, verbose, run_suite # When running as a script instead of within the regrtest framework, skip the # requires test, since it's obvious we want to run them. @@ -27,7 +28,10 @@ import bsddb.test.test_1413192 except: for f in ['__db.001', '__db.002', '__db.003', 'log.0000000001']: - unlink(f) + try: + os.unlink(f) + except OSError: + pass test_modules = [ 'test_associate', From python-checkins at python.org Wed Jan 25 09:18:44 2006 From: python-checkins at python.org (neal.norwitz) Date: Wed, 25 Jan 2006 09:18:44 +0100 (CET) Subject: [Python-checkins] r42184 - python/branches/release24-maint/setup.py Message-ID: <20060125081844.A1B681E4002@bag.python.org> Author: neal.norwitz Date: Wed Jan 25 09:18:43 2006 New Revision: 42184 Modified: python/branches/release24-maint/setup.py Log: Find bdb4 correctly on gentoo systems (this matches head) Modified: python/branches/release24-maint/setup.py ============================================================================== --- python/branches/release24-maint/setup.py (original) +++ python/branches/release24-maint/setup.py Wed Jan 25 09:18:43 2006 @@ -509,6 +509,7 @@ # 4.x minor number specific paths for x in (0,1,2,3,4): db_inc_paths.append('/usr/include/db4%d' % x) + db_inc_paths.append('/usr/include/db4.%d' % x) db_inc_paths.append('/usr/local/BerkeleyDB.4.%d/include' % x) db_inc_paths.append('/usr/local/include/db4%d' % x) db_inc_paths.append('/pkg/db-4.%d/include' % x) From python-checkins at python.org Wed Jan 25 09:39:40 2006 From: python-checkins at python.org (neal.norwitz) Date: Wed, 25 Jan 2006 09:39:40 +0100 (CET) Subject: [Python-checkins] r42185 - python/trunk/Lib/test/test_socket_ssl.py Message-ID: <20060125083940.E45321E4002@bag.python.org> Author: neal.norwitz Date: Wed Jan 25 09:39:35 2006 New Revision: 42185 Modified: python/trunk/Lib/test/test_socket_ssl.py Log: There was a race condition where the connector would try to connect before the listener was ready (on gentoo x86 buildslave). This caused the listener to not exit normally since nobody connected to it (waited in accept()). The exception was raised in the other thread and the test failed. This fix doesn't completely eliminate the race, but should make it near impossible to trigger. Hopefully it's good enough. Modified: python/trunk/Lib/test/test_socket_ssl.py ============================================================================== --- python/trunk/Lib/test/test_socket_ssl.py (original) +++ python/trunk/Lib/test/test_socket_ssl.py Wed Jan 25 09:39:35 2006 @@ -35,6 +35,7 @@ # Some random port to connect to. PORT = 9934 + listener_ready = threading.Event() listener_gone = threading.Event() # `listener` runs in a thread. It opens a socket listening on PORT, and @@ -45,11 +46,13 @@ s = socket.socket() s.bind(('', PORT)) s.listen(5) + listener_ready.set() s.accept() s = None # reclaim the socket object, which also closes it listener_gone.set() def connector(): + listener_ready.wait() s = socket.socket() s.connect(('localhost', PORT)) listener_gone.wait() From tim.peters at gmail.com Thu Jan 26 02:35:17 2006 From: tim.peters at gmail.com (Tim Peters) Date: Wed, 25 Jan 2006 20:35:17 -0500 Subject: [Python-checkins] r42185 - python/trunk/Lib/test/test_socket_ssl.py In-Reply-To: <20060125083940.E45321E4002@bag.python.org> References: <20060125083940.E45321E4002@bag.python.org> Message-ID: <1f7befae0601251735k3b000b97u6ba0f9a03af80ad4@mail.gmail.com> [neal.norwitz] > Modified: > python/trunk/Lib/test/test_socket_ssl.py > Log: > There was a race condition where the connector would try to connect > before the listener was ready (on gentoo x86 buildslave). This > caused the listener to not exit normally since nobody connected to it > (waited in accept()). The exception was raised in the other thread > and the test failed. Good catch! Thank you. > This fix doesn't completely eliminate the race, but should make it > near impossible to trigger. Hopefully it's good enough. Which race do you have in mind? The server socket doesn't need to do .accept() before a client socket can connect -- the server socket only needs to have done .listen() for a connection to succeed. > + listener_ready = threading.Event() ... [in the server] > s = socket.socket() > s.bind(('', PORT)) > s.listen(5) > + listener_ready.set() > s.accept() ... [in the client] > def connector(): > + listener_ready.wait() > s = socket.socket() > s.connect(('localhost', PORT)) Because the server doesn't set listener_ready until after the server has done listen(), and the client waits for that event, it "should be" 100% reliable that the client's connect() succeeds. Or do you have some other race in mind? From nnorwitz at gmail.com Thu Jan 26 02:54:05 2006 From: nnorwitz at gmail.com (Neal Norwitz) Date: Wed, 25 Jan 2006 17:54:05 -0800 Subject: [Python-checkins] r42185 - python/trunk/Lib/test/test_socket_ssl.py In-Reply-To: <1f7befae0601251735k3b000b97u6ba0f9a03af80ad4@mail.gmail.com> References: <20060125083940.E45321E4002@bag.python.org> <1f7befae0601251735k3b000b97u6ba0f9a03af80ad4@mail.gmail.com> Message-ID: On 1/25/06, Tim Peters wrote: > > Because the server doesn't set listener_ready until after the server > has done listen(), and the client waits for that event, it "should > be" 100% reliable that the client's connect() succeeds. > > Or do you have some other race in mind? That's what I was thinking of. I thought you had to be accept()ing prior to connect() working. I thought listen() only sets the # of outstanding connections allowed (basically internal buffer). But if the listen() is sufficient, I agree there is no race. n From python-checkins at python.org Thu Jan 26 06:19:57 2006 From: python-checkins at python.org (neal.norwitz) Date: Thu, 26 Jan 2006 06:19:57 +0100 (CET) Subject: [Python-checkins] r42187 - python/branches/release24-maint/Modules/_bsddb.c Message-ID: <20060126051957.8D6451E4002@bag.python.org> Author: neal.norwitz Date: Thu Jan 26 06:19:56 2006 New Revision: 42187 Modified: python/branches/release24-maint/Modules/_bsddb.c Log: Backport 42173: get bsddb test_associate to pass with BSDDB 4.1 Modified: python/branches/release24-maint/Modules/_bsddb.c ============================================================================== --- python/branches/release24-maint/Modules/_bsddb.c (original) +++ python/branches/release24-maint/Modules/_bsddb.c Thu Jan 26 06:19:56 2006 @@ -906,7 +906,7 @@ } #endif - if (self->db_env) { + if (self->db_env && !self->closed) { MYDB_BEGIN_ALLOW_THREADS; self->db_env->close(self->db_env, 0); MYDB_END_ALLOW_THREADS; @@ -2996,7 +2996,7 @@ else pkeyObj = PyString_FromStringAndSize(pkey.data, pkey.size); - if (flags & DB_SET_RECNO) /* return key, pkey and data */ + if (key.data && key.size) /* return key, pkey and data */ { PyObject *keyObj; int type = _DB_get_type(self->mydb); From python-checkins at python.org Thu Jan 26 19:50:03 2006 From: python-checkins at python.org (barry.warsaw) Date: Thu, 26 Jan 2006 19:50:03 +0100 (CET) Subject: [Python-checkins] r42188 - python/branches/release24-maint/Include/objimpl.h Message-ID: <20060126185003.1FDA61E4013@bag.python.org> Author: barry.warsaw Date: Thu Jan 26 19:49:57 2006 New Revision: 42188 Modified: python/branches/release24-maint/Include/objimpl.h Log: Fix PyGC_Collect() to be exported from the built DLL on Windows. (Fix given by Matt Messier). Modified: python/branches/release24-maint/Include/objimpl.h ============================================================================== --- python/branches/release24-maint/Include/objimpl.h (original) +++ python/branches/release24-maint/Include/objimpl.h Thu Jan 26 19:49:57 2006 @@ -229,7 +229,7 @@ */ /* C equivalent of gc.collect(). */ -long PyGC_Collect(void); +PyAPI_FUNC(long) PyGC_Collect(void); /* Test if a type has a GC head */ #define PyType_IS_GC(t) PyType_HasFeature((t), Py_TPFLAGS_HAVE_GC) From python-checkins at python.org Thu Jan 26 19:59:07 2006 From: python-checkins at python.org (barry.warsaw) Date: Thu, 26 Jan 2006 19:59:07 +0100 (CET) Subject: [Python-checkins] r42189 - python/trunk/Include/objimpl.h Message-ID: <20060126185907.747D91E4002@bag.python.org> Author: barry.warsaw Date: Thu Jan 26 19:59:06 2006 New Revision: 42189 Modified: python/trunk/Include/objimpl.h Log: Fix PyGC_Collect() to be exported from the built DLL on Windows. (Fix given by Matt Messier). Modified: python/trunk/Include/objimpl.h ============================================================================== --- python/trunk/Include/objimpl.h (original) +++ python/trunk/Include/objimpl.h Thu Jan 26 19:59:06 2006 @@ -229,7 +229,7 @@ */ /* C equivalent of gc.collect(). */ -long PyGC_Collect(void); +PyAPI_FUNC(long) PyGC_Collect(void); /* Test if a type has a GC head */ #define PyType_IS_GC(t) PyType_HasFeature((t), Py_TPFLAGS_HAVE_GC) From python-checkins at python.org Thu Jan 26 23:08:13 2006 From: python-checkins at python.org (phillip.eby) Date: Thu, 26 Jan 2006 23:08:13 +0100 (CET) Subject: [Python-checkins] r42191 - in sandbox/trunk/setuptools: EasyInstall.txt setuptools/command/easy_install.py Message-ID: <20060126220813.2A8C61E4002@bag.python.org> Author: phillip.eby Date: Thu Jan 26 23:08:07 2006 New Revision: 42191 Modified: sandbox/trunk/setuptools/EasyInstall.txt sandbox/trunk/setuptools/setuptools/command/easy_install.py Log: Expand ``$variables`` used in the ``--site-dirs``, ``--build-directory``, ``--install-dir``, and ``--script-dir`` options, whether on the command line or in configuration files. Modified: sandbox/trunk/setuptools/EasyInstall.txt ============================================================================== --- sandbox/trunk/setuptools/EasyInstall.txt (original) +++ sandbox/trunk/setuptools/EasyInstall.txt Thu Jan 26 23:08:07 2006 @@ -963,6 +963,13 @@ time out or be missing a file. 0.6a10 + * Expand ``$variables`` used in the ``--site-dirs``, ``--build-directory``, + ``--install-dir``, and ``--script-dir`` options, whether on the command line + or in configuration files. + + * Improved SourceForge mirror processing to work faster and be less affected + by transient HTML changes made by SourceForge. + * PyPI searches now use the exact spelling of requirements specified on the command line or in a project's ``install_requires``. Previously, a normalized form of the name was used, which could lead to unnecessary @@ -976,7 +983,6 @@ page). 0.6a9 - * Fixed ``.pth`` file processing picking up nested eggs (i.e. ones inside "baskets") when they weren't explicitly listed in the ``.pth`` file. Modified: sandbox/trunk/setuptools/setuptools/command/easy_install.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/easy_install.py (original) +++ sandbox/trunk/setuptools/setuptools/command/easy_install.py Thu Jan 26 23:08:07 2006 @@ -122,6 +122,7 @@ os.unlink(filename) def finalize_options(self): + self._expand('install_dir','script_dir','build_directory','site_dirs') # If a non-default installation directory was specified, default the # script directory to match it. if self.script_dir is None: @@ -154,7 +155,6 @@ ) else: self.all_site_dirs.append(normalize_path(d)) - instdir = normalize_path(self.install_dir or self.all_site_dirs[-1]) if instdir in self.all_site_dirs: if self.pth_file is None: @@ -887,15 +887,15 @@ finally: log.set_verbosity(self.verbose) # restore original verbosity - - - - - - - - - + def _expand(self, *attrs): + config_vars = self.get_finalized_command('install').config_vars + for attr in attrs: + val = getattr(self, attr) + if val is not None: + if os.name == 'posix': + val = os.path.expanduser(val) + val = subst_vars(val, config_vars) + setattr(self, attr, val) From python-checkins at python.org Fri Jan 27 08:05:41 2006 From: python-checkins at python.org (gregory.p.smith) Date: Fri, 27 Jan 2006 08:05:41 +0100 (CET) Subject: [Python-checkins] r42192 - python/trunk/Modules/_bsddb.c Message-ID: <20060127070541.6C29C1E4024@bag.python.org> Author: gregory.p.smith Date: Fri Jan 27 08:05:40 2006 New Revision: 42192 Modified: python/trunk/Modules/_bsddb.c Log: Add wrapper for DBEnv.set_tx_timeout method to allow time based DB_RECOVER Modified: python/trunk/Modules/_bsddb.c ============================================================================== --- python/trunk/Modules/_bsddb.c (original) +++ python/trunk/Modules/_bsddb.c Fri Jan 27 08:05:40 2006 @@ -97,7 +97,7 @@ #error "eek! DBVER can't handle minor versions > 9" #endif -#define PY_BSDDB_VERSION "4.4.1" +#define PY_BSDDB_VERSION "4.4.2" static char *rcs_id = "$Id$"; @@ -4164,9 +4164,23 @@ return NULL; CHECK_ENV_NOT_CLOSED(self); - MYDB_BEGIN_ALLOW_THREADS; err = self->db_env->set_tx_max(self->db_env, max); - MYDB_END_ALLOW_THREADS; + RETURN_IF_ERR(); + RETURN_NONE(); +} + + +static PyObject* +DBEnv_set_tx_timestamp(DBEnvObject* self, PyObject* args) +{ + int err; + time_t stamp; + + if (!PyArg_ParseTuple(args, "i:set_tx_timestamp", &stamp)) + return NULL; + CHECK_ENV_NOT_CLOSED(self); + + err = self->db_env->set_tx_timestamp(self->db_env, &stamp); RETURN_IF_ERR(); RETURN_NONE(); } @@ -4723,6 +4737,7 @@ {"txn_checkpoint", (PyCFunction)DBEnv_txn_checkpoint, METH_VARARGS}, {"txn_stat", (PyCFunction)DBEnv_txn_stat, METH_VARARGS}, {"set_tx_max", (PyCFunction)DBEnv_set_tx_max, METH_VARARGS}, + {"set_tx_timestamp", (PyCFunction)DBEnv_set_tx_timestamp, METH_VARARGS}, {"lock_detect", (PyCFunction)DBEnv_lock_detect, METH_VARARGS}, {"lock_get", (PyCFunction)DBEnv_lock_get, METH_VARARGS}, {"lock_id", (PyCFunction)DBEnv_lock_id, METH_VARARGS}, From python-checkins at python.org Fri Jan 27 08:06:15 2006 From: python-checkins at python.org (gregory.p.smith) Date: Fri, 27 Jan 2006 08:06:15 +0100 (CET) Subject: [Python-checkins] r42193 - in python/trunk/Lib/bsddb: dbobj.py test/test_basics.py Message-ID: <20060127070615.E5D561E4002@bag.python.org> Author: gregory.p.smith Date: Fri Jan 27 08:06:15 2006 New Revision: 42193 Modified: python/trunk/Lib/bsddb/dbobj.py python/trunk/Lib/bsddb/test/test_basics.py Log: Add wrapper for DBEnv.set_tx_timeout method to allow time based DB_RECOVER (test cases and dbobj wrapping) Modified: python/trunk/Lib/bsddb/dbobj.py ============================================================================== --- python/trunk/Lib/bsddb/dbobj.py (original) +++ python/trunk/Lib/bsddb/dbobj.py Fri Jan 27 08:06:15 2006 @@ -77,6 +77,8 @@ return apply(self._cobj.txn_stat, args, kwargs) def set_tx_max(self, *args, **kwargs): return apply(self._cobj.set_tx_max, args, kwargs) + def set_tx_timestamp(self, *args, **kwargs): + return apply(self._cobj.set_tx_timestamp, args, kwargs) def lock_detect(self, *args, **kwargs): return apply(self._cobj.lock_detect, args, kwargs) def lock_get(self, *args, **kwargs): Modified: python/trunk/Lib/bsddb/test/test_basics.py ============================================================================== --- python/trunk/Lib/bsddb/test/test_basics.py (original) +++ python/trunk/Lib/bsddb/test/test_basics.py Fri Jan 27 08:06:15 2006 @@ -11,6 +11,7 @@ import tempfile from pprint import pprint import unittest +import time try: # For Pythons w/distutils pybsddb @@ -64,6 +65,8 @@ try: self.env = db.DBEnv() self.env.set_lg_max(1024*1024) + self.env.set_tx_max(30) + self.env.set_tx_timestamp(int(time.time())) self.env.set_flags(self.envsetflags, 1) self.env.open(homeDir, self.envflags | db.DB_CREATE) tempfile.tempdir = homeDir From python-checkins at python.org Fri Jan 27 16:18:40 2006 From: python-checkins at python.org (jeremy.hylton) Date: Fri, 27 Jan 2006 16:18:40 +0100 (CET) Subject: [Python-checkins] r42194 - in python/trunk: Lib/test/test_genexps.py Lib/test/test_syntax.py Parser/Python.asdl Python/ast.c Message-ID: <20060127151840.5A9441E4002@bag.python.org> Author: jeremy.hylton Date: Fri Jan 27 16:18:39 2006 New Revision: 42194 Modified: python/trunk/Lib/test/test_genexps.py python/trunk/Lib/test/test_syntax.py python/trunk/Parser/Python.asdl python/trunk/Python/ast.c Log: Improved handling of syntax errors. Expand set of errors caught in set_context(). Some new errors, some old error messages changed for consistency. Fixed error checking in generator expression code. The first set of tests were impossible condition given the grammar. In general, the ast code uses REQ() for those sanity checks. Fix some error handling for augmented assignments. As comments in the code explain, set_context() ought to work here, but I got unexpected crashes when I tried it. Should come back to this. Add note to Grammar that yield expression is a special case. Add doctest cases for SyntaxErrors raised by ast.c. Modified: python/trunk/Lib/test/test_genexps.py ============================================================================== --- python/trunk/Lib/test/test_genexps.py (original) +++ python/trunk/Lib/test/test_genexps.py Fri Jan 27 16:18:39 2006 @@ -137,7 +137,7 @@ >>> (y for y in (1,2)) = 10 Traceback (most recent call last): ... - SyntaxError: assignment to generator expression not possible (, line 1) + SyntaxError: can't assign to generator expression (, line 1) >>> (y for y in (1,2)) += 10 Traceback (most recent call last): Modified: python/trunk/Lib/test/test_syntax.py ============================================================================== --- python/trunk/Lib/test/test_syntax.py (original) +++ python/trunk/Lib/test/test_syntax.py Fri Jan 27 16:18:39 2006 @@ -1,3 +1,239 @@ +"""This module tests SyntaxErrors. + +Here's an example of the sort of thing that is tested. + +>>> def f(x): +... global x +Traceback (most recent call last): +SyntaxError: name 'x' is local and global + +The tests are all raise SyntaxErrors. They were created by checking +each C call that raises SyntaxError. There are several modules that +raise these exceptions-- ast.c, compile.c, future.c, pythonrun.c, and +symtable.c. + +The parser itself outlaws a lot of invalid syntax. None of these +errors are tested here at the moment. We should add some tests; since +there are infinitely many programs with invalid syntax, we would need +to be judicious in selecting some. + +The compiler generates a synthetic module name for code executed by +doctest. Since all the code comes from the same module, a suffix like +[1] is appended to the module name, As a consequence, changing the +order of tests in this module means renumbering all the errors after +it. (Maybe we should enable the ellipsis option for these tests.) + +In ast.c, syntax errors are raised by calling ast_error(). + +Errors from set_context(): + +TODO(jhylton): "assignment to None" is inconsistent with other messages + +>>> obj.None = 1 +Traceback (most recent call last): +SyntaxError: assignment to None (, line 1) + +>>> None = 1 +Traceback (most recent call last): +SyntaxError: assignment to None (, line 1) + +It's a syntax error to assign to the empty tuple. Why isn't it an +error to assign to the empty list? It will always raise some error at +runtime. + +>>> () = 1 +Traceback (most recent call last): +SyntaxError: can't assign to () (, line 1) + +>>> f() = 1 +Traceback (most recent call last): +SyntaxError: can't assign to function call (, line 1) + +>>> del f() +Traceback (most recent call last): +SyntaxError: can't delete function call (, line 1) + +>>> a + 1 = 2 +Traceback (most recent call last): +SyntaxError: can't assign to operator (, line 1) + +>>> (x for x in x) = 1 +Traceback (most recent call last): +SyntaxError: can't assign to generator expression (, line 1) + +>>> 1 = 1 +Traceback (most recent call last): +SyntaxError: can't assign to literal (, line 1) + +>>> "abc" = 1 +Traceback (most recent call last): +SyntaxError: can't assign to literal (, line 1) + +>>> `1` = 1 +Traceback (most recent call last): +SyntaxError: can't assign to repr (, line 1) + +If the left-hand side of an assignment is a list or tuple, an illegal +expression inside that contain should still cause a syntax error. +This test just checks a couple of cases rather than enumerating all of +them. + +>>> (a, "b", c) = (1, 2, 3) +Traceback (most recent call last): +SyntaxError: can't assign to literal (, line 1) + +>>> [a, b, c + 1] = [1, 2, 3] +Traceback (most recent call last): +SyntaxError: can't assign to operator (, line 1) + + +From compiler_complex_args(): + +>>> def f(None=1): +... pass +Traceback (most recent call last): +SyntaxError: assignment to None (, line 1) + + +From ast_for_arguments(): + +>>> def f(x, y=1, z): +... pass +Traceback (most recent call last): +SyntaxError: non-default argument follows default argument (, line 1) + +>>> def f(x, None): +... pass +Traceback (most recent call last): +SyntaxError: assignment to None (, line 1) + +>>> def f(*None): +... pass +Traceback (most recent call last): +SyntaxError: assignment to None (, line 1) + +>>> def f(**None): +... pass +Traceback (most recent call last): +SyntaxError: assignment to None (, line 1) + + +From ast_for_funcdef(): + +>>> def None(x): +... pass +Traceback (most recent call last): +SyntaxError: assignment to None (, line 1) + + +From ast_for_call(): + +>>> def f(it, *varargs): +... return list(it) +>>> L = range(10) +>>> f(x for x in L) +[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +>>> f(x for x in L, 1) +Traceback (most recent call last): +SyntaxError: Generator expression must be parenthesized if not sole argument (, line 1) +>>> f((x for x in L), 1) +[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + +>>> f(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, +... i12, i13, i14, i15, i16, i17, i18, i19, i20, i21, i22, +... i23, i24, i25, i26, i27, i28, i29, i30, i31, i32, i33, +... i34, i35, i36, i37, i38, i39, i40, i41, i42, i43, i44, +... i45, i46, i47, i48, i49, i50, i51, i52, i53, i54, i55, +... i56, i57, i58, i59, i60, i61, i62, i63, i64, i65, i66, +... i67, i68, i69, i70, i71, i72, i73, i74, i75, i76, i77, +... i78, i79, i80, i81, i82, i83, i84, i85, i86, i87, i88, +... i89, i90, i91, i92, i93, i94, i95, i96, i97, i98, i99, +... i100, i101, i102, i103, i104, i105, i106, i107, i108, +... i109, i110, i111, i112, i113, i114, i115, i116, i117, +... i118, i119, i120, i121, i122, i123, i124, i125, i126, +... i127, i128, i129, i130, i131, i132, i133, i134, i135, +... i136, i137, i138, i139, i140, i141, i142, i143, i144, +... i145, i146, i147, i148, i149, i150, i151, i152, i153, +... i154, i155, i156, i157, i158, i159, i160, i161, i162, +... i163, i164, i165, i166, i167, i168, i169, i170, i171, +... i172, i173, i174, i175, i176, i177, i178, i179, i180, +... i181, i182, i183, i184, i185, i186, i187, i188, i189, +... i190, i191, i192, i193, i194, i195, i196, i197, i198, +... i199, i200, i201, i202, i203, i204, i205, i206, i207, +... i208, i209, i210, i211, i212, i213, i214, i215, i216, +... i217, i218, i219, i220, i221, i222, i223, i224, i225, +... i226, i227, i228, i229, i230, i231, i232, i233, i234, +... i235, i236, i237, i238, i239, i240, i241, i242, i243, +... i244, i245, i246, i247, i248, i249, i250, i251, i252, +... i253, i254, i255) +Traceback (most recent call last): +SyntaxError: more than 255 arguments (, line 1) + +The actual error cases counts positional arguments, keyword arguments, +and generator expression arguments separately. This test combines the +three. + +>>> f(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, +... i12, i13, i14, i15, i16, i17, i18, i19, i20, i21, i22, +... i23, i24, i25, i26, i27, i28, i29, i30, i31, i32, i33, +... i34, i35, i36, i37, i38, i39, i40, i41, i42, i43, i44, +... i45, i46, i47, i48, i49, i50, i51, i52, i53, i54, i55, +... i56, i57, i58, i59, i60, i61, i62, i63, i64, i65, i66, +... i67, i68, i69, i70, i71, i72, i73, i74, i75, i76, i77, +... i78, i79, i80, i81, i82, i83, i84, i85, i86, i87, i88, +... i89, i90, i91, i92, i93, i94, i95, i96, i97, i98, i99, +... i100, i101, i102, i103, i104, i105, i106, i107, i108, +... i109, i110, i111, i112, i113, i114, i115, i116, i117, +... i118, i119, i120, i121, i122, i123, i124, i125, i126, +... i127, i128, i129, i130, i131, i132, i133, i134, i135, +... i136, i137, i138, i139, i140, i141, i142, i143, i144, +... i145, i146, i147, i148, i149, i150, i151, i152, i153, +... i154, i155, i156, i157, i158, i159, i160, i161, i162, +... i163, i164, i165, i166, i167, i168, i169, i170, i171, +... i172, i173, i174, i175, i176, i177, i178, i179, i180, +... i181, i182, i183, i184, i185, i186, i187, i188, i189, +... i190, i191, i192, i193, i194, i195, i196, i197, i198, +... i199, i200, i201, i202, i203, i204, i205, i206, i207, +... i208, i209, i210, i211, i212, i213, i214, i215, i216, +... i217, i218, i219, i220, i221, i222, i223, i224, i225, +... i226, i227, i228, i229, i230, i231, i232, i233, i234, +... i235, i236, i237, i238, i239, i240, i241, i242, i243, +... (x for x in i244), i245, i246, i247, i248, i249, i250, i251, +... i252=1, i253=1, i254=1, i255=1) +Traceback (most recent call last): +SyntaxError: more than 255 arguments (, line 1) + +>>> f(lambda x: x[0] = 3) +Traceback (most recent call last): +SyntaxError: lambda cannot contain assignment (, line 1) + +The grammar accepts any test (basically, any expression) in the +keyword slot of a call site. Test a few different options. + +>>> f(x()=2) +Traceback (most recent call last): +SyntaxError: keyword can't be an expression (, line 1) +>>> f(a or b=1) +Traceback (most recent call last): +SyntaxError: keyword can't be an expression (, line 1) +>>> f(x.y=1) +Traceback (most recent call last): +SyntaxError: keyword can't be an expression (, line 1) + + +From ast_for_expr_stmt(): + +>>> (x for x in x) += 1 +Traceback (most recent call last): +SyntaxError: augmented assignment to generator expression not possible (, line 1) +>>> None += 1 +Traceback (most recent call last): +SyntaxError: assignment to None (, line 1) +>>> f() += 1 +Traceback (most recent call last): +SyntaxError: illegal expression for augmented assignment (, line 1) +""" + import re import unittest import warnings @@ -56,6 +292,8 @@ def test_main(): test_support.run_unittest(SyntaxTestCase) + from test import test_syntax + test_support.run_doctest(test_syntax, verbosity=True) if __name__ == "__main__": test_main() Modified: python/trunk/Parser/Python.asdl ============================================================================== --- python/trunk/Parser/Python.asdl (original) +++ python/trunk/Parser/Python.asdl Fri Jan 27 16:18:39 2006 @@ -55,6 +55,7 @@ | Dict(expr* keys, expr* values) | ListComp(expr elt, comprehension* generators) | GeneratorExp(expr elt, comprehension* generators) + -- the grammar constrains where yield expressions can occur | Yield(expr? value) -- need sequences for compare to distinguish between -- x < 4 < 3 and (x < 4) < 3 Modified: python/trunk/Python/ast.c ============================================================================== --- python/trunk/Python/ast.c (original) +++ python/trunk/Python/ast.c Fri Jan 27 16:18:39 2006 @@ -329,6 +329,19 @@ set_context(expr_ty e, expr_context_ty ctx, const node *n) { asdl_seq *s = NULL; + /* If a particular expression type can't be used for assign / delete, + set expr_name to its name and an error message will be generated. + */ + const char* expr_name = NULL; + + /* The ast defines augmented store and load contexts, but the + implementation here doesn't actually use them. The code may be + a little more complex than necessary as a result. It also means + that expressions in an augmented assignment have no context. + Consider restructuring so that augmented assignment uses + set_context(), too + */ + assert(ctx != AugStore && ctx != AugLoad); switch (e->kind) { case Attribute_kind: @@ -358,30 +371,50 @@ e->v.Tuple.ctx = ctx; s = e->v.Tuple.elts; break; + case Lambda_kind: + expr_name = "lambda"; + break; case Call_kind: - if (ctx == Store) - return ast_error(n, "can't assign to function call"); - else if (ctx == Del) - return ast_error(n, "can't delete function call"); - else - return ast_error(n, "unexpected operation on function call"); + expr_name = "function call"; break; + case BoolOp_kind: case BinOp_kind: - return ast_error(n, "can't assign to operator"); + case UnaryOp_kind: + expr_name = "operator"; + break; case GeneratorExp_kind: - return ast_error(n, "assignment to generator expression " - "not possible"); + expr_name = "generator expression"; + break; + case ListComp_kind: + expr_name = "list comprehension"; + break; + case Dict_kind: case Num_kind: case Str_kind: - return ast_error(n, "can't assign to literal"); - default: { - char buf[300]; - PyOS_snprintf(buf, sizeof(buf), - "unexpected expression in assignment %d (line %d)", - e->kind, e->lineno); - return ast_error(n, buf); - } + expr_name = "literal"; + break; + case Compare_kind: + expr_name = "comparison"; + break; + case Repr_kind: + expr_name = "repr"; + break; + default: + PyErr_Format(PyExc_SystemError, + "unexpected expression in assignment %d (line %d)", + e->kind, e->lineno); + return 0; + } + /* Check for error string set by switch */ + if (expr_name) { + char buf[300]; + PyOS_snprintf(buf, sizeof(buf), + "can't %s %s", + ctx == Store ? "assign to" : "delete", + expr_name); + return ast_error(n, buf); } + /* If the LHS is a list or tuple, we need to set the assignment context for all the tuple elements. */ @@ -699,12 +732,8 @@ expr_ty name_expr; REQ(n, decorator); - - if ((NCH(n) < 3 && NCH(n) != 5 && NCH(n) != 6) - || TYPE(CHILD(n, 0)) != AT || TYPE(RCHILD(n, -1)) != NEWLINE) { - ast_error(n, "Invalid decorator node"); - return NULL; - } + REQ(CHILD(n, 0), AT); + REQ(RCHILD(n, -1), NEWLINE); name_expr = ast_for_dotted_name(c, CHILD(n, 1)); if (!name_expr) @@ -1610,7 +1639,7 @@ } } if (ngens > 1 || (ngens && (nargs || nkeywords))) { - ast_error(n, "Generator expression must be parenthesised " + ast_error(n, "Generator expression must be parenthesized " "if not sole argument"); return NULL; } @@ -1779,18 +1808,28 @@ if (!expr1) return NULL; - if (expr1->kind == GeneratorExp_kind) { - ast_error(ch, "augmented assignment to generator " - "expression not possible"); - return NULL; + // TODO(jhylton): Figure out why set_context() can't be used here. + switch (expr1->kind) { + case GeneratorExp_kind: + ast_error(ch, "augmented assignment to generator " + "expression not possible"); + return NULL; + case Name_kind: { + const char *var_name = PyString_AS_STRING(expr1->v.Name.id); + if (var_name[0] == 'N' && !strcmp(var_name, "None")) { + ast_error(ch, "assignment to None"); + return NULL; + } + break; + } + case Attribute_kind: + case Subscript_kind: + break; + default: + ast_error(ch, "illegal expression for augmented " + "assignment"); + return NULL; } - if (expr1->kind == Name_kind) { - char *var_name = PyString_AS_STRING(expr1->v.Name.id); - if (var_name[0] == 'N' && !strcmp(var_name, "None")) { - ast_error(ch, "assignment to None"); - return NULL; - } - } ch = CHILD(n, 2); if (TYPE(ch) == testlist) From python-checkins at python.org Fri Jan 27 17:48:00 2006 From: python-checkins at python.org (phillip.eby) Date: Fri, 27 Jan 2006 17:48:00 +0100 (CET) Subject: [Python-checkins] r42195 - sandbox/trunk/setuptools/setuptools/command/easy_install.py Message-ID: <20060127164800.6F7AA1E4008@bag.python.org> Author: phillip.eby Date: Fri Jan 27 17:47:59 2006 New Revision: 42195 Modified: sandbox/trunk/setuptools/setuptools/command/easy_install.py Log: Fix missing import. Modified: sandbox/trunk/setuptools/setuptools/command/easy_install.py ============================================================================== --- sandbox/trunk/setuptools/setuptools/command/easy_install.py (original) +++ sandbox/trunk/setuptools/setuptools/command/easy_install.py Fri Jan 27 17:47:59 2006 @@ -889,6 +889,7 @@ def _expand(self, *attrs): config_vars = self.get_finalized_command('install').config_vars + from distutils.util import subst_vars for attr in attrs: val = getattr(self, attr) if val is not None: @@ -899,7 +900,6 @@ - def get_site_dirs(): # return a list of 'site' dirs, based on 'site' module's code to do this sitedirs = [] From python-checkins at python.org Sat Jan 28 00:55:21 2006 From: python-checkins at python.org (david.goodger) Date: Sat, 28 Jan 2006 00:55:21 +0100 (CET) Subject: [Python-checkins] r42196 - peps/trunk/pep.css Message-ID: <20060127235521.8E8B41E4002@bag.python.org> Author: david.goodger Date: Sat Jan 28 00:55:21 2006 New Revision: 42196 Modified: peps/trunk/pep.css Log: removed pale gray background, to spare Guido's poor tired eyes ;-) Modified: peps/trunk/pep.css ============================================================================== --- peps/trunk/pep.css (original) +++ peps/trunk/pep.css Sat Jan 28 00:55:21 2006 @@ -277,8 +277,7 @@ pre.literal-block, pre.doctest-block { margin-left: 2em ; - margin-right: 2em ; - background-color: #eeeeee } + margin-right: 2em } span.classifier { font-family: sans-serif ; @@ -341,8 +340,5 @@ h4 tt.docutils, h5 tt.docutils, h6 tt.docutils { font-size: 100% } -tt.docutils { - background-color: #eeeeee } - ul.auto-toc { list-style-type: none } From agorjia at yahoo.com Sat Jan 28 04:41:53 2006 From: agorjia at yahoo.com (Gorj Gorjia) Date: Fri, 27 Jan 2006 19:41:53 -0800 (PST) Subject: [Python-checkins] ..:: Don't loose to see ::..:: A Great address for you ::.. Message-ID: <20060128034153.60214.qmail@web37103.mail.mud.yahoo.com> Free of Virus,Worm, Trojan and every infected program. Hi to all. It's late if you loose this chance. get a free Acer TeravelMate Notebook Make free Salary by Greenhors ticker bar Visit this two Address for more Information http://www.geocities.com/abreg5/ http://www.geocities.com/softgoware/ __________________________________________________ Do You Yahoo!? Tired of spam? Yahoo! Mail has the best spam protection around http://mail.yahoo.com __________________________________________________ Do You Yahoo!? Tired of spam? Yahoo! Mail has the best spam protection around http://mail.yahoo.com From python-checkins at python.org Sat Jan 28 13:05:54 2006 From: python-checkins at python.org (thomas.wouters) Date: Sat, 28 Jan 2006 13:05:54 +0100 (CET) Subject: [Python-checkins] r42197 - python/trunk/Lib/test/test_pty.py Message-ID: <20060128120554.C98691E4003@bag.python.org> Author: thomas.wouters Date: Sat Jan 28 13:05:54 2006 New Revision: 42197 Modified: python/trunk/Lib/test/test_pty.py Log: Work around a Solaris peculiarity that caused test_pty to sometimes fail: a tty opened by os.openpty() isn't always a tty according to os.isatty(), when it's tested inside the process that opened it. Doesn't affect actual functionality, as using a tty this way is rarely, if ever, useful. Ignoring the failure allows the test for actual functionality to continue. Will backport to 2.4-maint. Modified: python/trunk/Lib/test/test_pty.py ============================================================================== --- python/trunk/Lib/test/test_pty.py (original) +++ python/trunk/Lib/test/test_pty.py Sat Jan 28 13:05:54 2006 @@ -4,6 +4,13 @@ TEST_STRING_1 = "I wish to buy a fish license.\n" TEST_STRING_2 = "For my pet fish, Eric.\n" +# Solaris (at least 2.9 and 2.10) seem to have a ficke isatty(). The first +# test below, testing the result of os.openpty() for tty-ness, sometimes +# (but not always) fails. The second isatty test, in the sub-process, always +# works. Allow that fickle first test to fail on these platforms, since it +# doesn't actually affect functionality. +fickle_isatty = ["sunos5"] + if verbose: def debug(msg): print msg @@ -26,7 +33,7 @@ # " An optional feature could not be imported " ... ? raise TestSkipped, "Pseudo-terminals (seemingly) not functional." - if not os.isatty(slave_fd): + if not os.isatty(slave_fd) and sys.platform not in fickle_isatty: raise TestFailed, "slave_fd is not a tty" # IRIX apparently turns \n into \r\n. Allow that, but avoid allowing other From python-checkins at python.org Sat Jan 28 13:08:39 2006 From: python-checkins at python.org (thomas.wouters) Date: Sat, 28 Jan 2006 13:08:39 +0100 (CET) Subject: [Python-checkins] r42198 - python/branches/release24-maint/Lib/test/test_pty.py Message-ID: <20060128120839.B740D1E4003@bag.python.org> Author: thomas.wouters Date: Sat Jan 28 13:08:39 2006 New Revision: 42198 Modified: python/branches/release24-maint/Lib/test/test_pty.py Log: Work around a Solaris peculiarity that caused test_pty to sometimes fail: a tty opened by os.openpty() isn't always a tty according to os.isatty(), when it's tested inside the process that opened it. Doesn't affect actual functionality, as using a tty this way is rarely, if ever, useful. Ignoring the failure allows the test for actual functionality to continue. Modified: python/branches/release24-maint/Lib/test/test_pty.py ============================================================================== --- python/branches/release24-maint/Lib/test/test_pty.py (original) +++ python/branches/release24-maint/Lib/test/test_pty.py Sat Jan 28 13:08:39 2006 @@ -4,6 +4,13 @@ TEST_STRING_1 = "I wish to buy a fish license.\n" TEST_STRING_2 = "For my pet fish, Eric.\n" +# Solaris (at least 2.9 and 2.10) seem to have a ficke isatty(). The first +# test below, testing the result of os.openpty() for tty-ness, sometimes +# (but not always) fails. The second isatty test, in the sub-process, always +# works. Allow that fickle first test to fail on these platforms, since it +# doesn't actually affect functionality. +fickle_isatty = ["sunos5"] + if verbose: def debug(msg): print msg @@ -26,7 +33,7 @@ # " An optional feature could not be imported " ... ? raise TestSkipped, "Pseudo-terminals (seemingly) not functional." - if not os.isatty(slave_fd): + if not os.isatty(slave_fd) and sys.platform not in fickle_isatty: raise TestFailed, "slave_fd is not a tty" # IRIX apparently turns \n into \r\n. Allow that, but avoid allowing other From python-checkins at python.org Sun Jan 29 10:53:46 2006 From: python-checkins at python.org (martin.v.loewis) Date: Sun, 29 Jan 2006 10:53:46 +0100 (CET) Subject: [Python-checkins] r42199 - python/trunk/aclocal.m4 python/trunk/configure python/trunk/configure.in Message-ID: <20060129095346.0241D1E4003@bag.python.org> Author: martin.v.loewis Date: Sun Jan 29 10:53:44 2006 New Revision: 42199 Removed: python/trunk/aclocal.m4 Modified: python/trunk/configure python/trunk/configure.in Log: Delete aclocal.m4, and require autoconf 2.59, fixing #811160 in a different way. Deleted: /python/trunk/aclocal.m4 ============================================================================== --- /python/trunk/aclocal.m4 Sun Jan 29 10:53:44 2006 +++ (empty file) @@ -1,57 +0,0 @@ -# Code swiped wholesale from the GCC project, see -# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12100 - -# This file can go away once autoconf 2.58 is out and being used - -# it's reported that this is fixed in the autoconf cvs already. - -# AC_LANG_FUNC_LINK_TRY(C)(FUNCTION) -# ---------------------------------- -# Don't include because on OSF/1 3.0 it includes -# which includes which contains a -# prototype for select. Similarly for bzero. -# -# A similar problem afflicts HP/UX, but it also hits -# -# This test used to merely assign f=$1 in main(), but that was -# optimized away by HP unbundled cc A.05.36 for ia64 under +O3, -# presumably on the basis that there's no need to do that store if the -# program is about to exit. Conversely, the AIX linker optimizes an -# unused external declaration that initializes f=$1. So this test -# program has both an external initialization of f, and a use of f in -# main that affects the exit status. -# -m4_define([AC_LANG_FUNC_LINK_TRY(C)], -[AC_LANG_PROGRAM( -[/* System header to define __stub macros and hopefully few prototypes, - which can conflict with char $1 (); below. - Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. Under hpux, - including includes and causes problems - checking for functions defined therein. */ -#if defined (__STDC__) && !defined (_HPUX_SOURCE) -# include -#else -# include -#endif -/* Override any gcc2 internal prototype to avoid an error. */ -#ifdef __cplusplus -extern "C" -{ -#endif -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $1 (); -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$1) || defined (__stub___$1) -choke me -#else -char (*f) () = $1; -#endif -#ifdef __cplusplus -} -#endif -], [return f != $1;])]) - - Modified: python/trunk/configure ============================================================================== --- python/trunk/configure (original) +++ python/trunk/configure Sun Jan 29 10:53:44 2006 @@ -1,5 +1,5 @@ #! /bin/sh -# From configure.in Revision: 41984 . +# From configure.in Revision: 42046 . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.59 for python 2.5. # @@ -11876,17 +11876,23 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +/* Define pthread_detach to an innocuous variant, in case declares pthread_detach. + For example, HP-UX 11i declares gettimeofday. */ +#define pthread_detach innocuous_pthread_detach + /* System header to define __stub macros and hopefully few prototypes, which can conflict with char pthread_detach (); below. Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. Under hpux, - including includes and causes problems - checking for functions defined therein. */ -#if defined (__STDC__) && !defined (_HPUX_SOURCE) + exists even on freestanding compilers. */ + +#ifdef __STDC__ # include #else # include #endif + +#undef pthread_detach + /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" @@ -12846,17 +12852,23 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $ac_func (); below. Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. Under hpux, - including includes and causes problems - checking for functions defined therein. */ -#if defined (__STDC__) && !defined (_HPUX_SOURCE) + exists even on freestanding compilers. */ + +#ifdef __STDC__ # include #else # include #endif + +#undef $ac_func + /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" @@ -13380,17 +13392,23 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $ac_func (); below. Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. Under hpux, - including includes and causes problems - checking for functions defined therein. */ -#if defined (__STDC__) && !defined (_HPUX_SOURCE) + exists even on freestanding compilers. */ + +#ifdef __STDC__ # include #else # include #endif + +#undef $ac_func + /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" @@ -13615,17 +13633,23 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $ac_func (); below. Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. Under hpux, - including includes and causes problems - checking for functions defined therein. */ -#if defined (__STDC__) && !defined (_HPUX_SOURCE) + exists even on freestanding compilers. */ + +#ifdef __STDC__ # include #else # include #endif + +#undef $ac_func + /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" @@ -14664,17 +14688,23 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $ac_func (); below. Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. Under hpux, - including includes and causes problems - checking for functions defined therein. */ -#if defined (__STDC__) && !defined (_HPUX_SOURCE) + exists even on freestanding compilers. */ + +#ifdef __STDC__ # include #else # include #endif + +#undef $ac_func + /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" @@ -14906,17 +14936,23 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $ac_func (); below. Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. Under hpux, - including includes and causes problems - checking for functions defined therein. */ -#if defined (__STDC__) && !defined (_HPUX_SOURCE) + exists even on freestanding compilers. */ + +#ifdef __STDC__ # include #else # include #endif + +#undef $ac_func + /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" @@ -15155,17 +15191,23 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $ac_func (); below. Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. Under hpux, - including includes and causes problems - checking for functions defined therein. */ -#if defined (__STDC__) && !defined (_HPUX_SOURCE) + exists even on freestanding compilers. */ + +#ifdef __STDC__ # include #else # include #endif + +#undef $ac_func + /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" @@ -15256,17 +15298,23 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $ac_func (); below. Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. Under hpux, - including includes and causes problems - checking for functions defined therein. */ -#if defined (__STDC__) && !defined (_HPUX_SOURCE) + exists even on freestanding compilers. */ + +#ifdef __STDC__ # include #else # include #endif + +#undef $ac_func + /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" @@ -15362,17 +15410,23 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $ac_func (); below. Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. Under hpux, - including includes and causes problems - checking for functions defined therein. */ -#if defined (__STDC__) && !defined (_HPUX_SOURCE) + exists even on freestanding compilers. */ + +#ifdef __STDC__ # include #else # include #endif + +#undef $ac_func + /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" @@ -15507,17 +15561,23 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $ac_func (); below. Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. Under hpux, - including includes and causes problems - checking for functions defined therein. */ -#if defined (__STDC__) && !defined (_HPUX_SOURCE) + exists even on freestanding compilers. */ + +#ifdef __STDC__ # include #else # include #endif + +#undef $ac_func + /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" @@ -15652,17 +15712,23 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $ac_func (); below. Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. Under hpux, - including includes and causes problems - checking for functions defined therein. */ -#if defined (__STDC__) && !defined (_HPUX_SOURCE) + exists even on freestanding compilers. */ + +#ifdef __STDC__ # include #else # include #endif + +#undef $ac_func + /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" @@ -16073,17 +16139,23 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $ac_func (); below. Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. Under hpux, - including includes and causes problems - checking for functions defined therein. */ -#if defined (__STDC__) && !defined (_HPUX_SOURCE) + exists even on freestanding compilers. */ + +#ifdef __STDC__ # include #else # include #endif + +#undef $ac_func + /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" @@ -18005,17 +18077,23 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +/* Define gethostbyname_r to an innocuous variant, in case declares gethostbyname_r. + For example, HP-UX 11i declares gettimeofday. */ +#define gethostbyname_r innocuous_gethostbyname_r + /* System header to define __stub macros and hopefully few prototypes, which can conflict with char gethostbyname_r (); below. Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. Under hpux, - including includes and causes problems - checking for functions defined therein. */ -#if defined (__STDC__) && !defined (_HPUX_SOURCE) + exists even on freestanding compilers. */ + +#ifdef __STDC__ # include #else # include #endif + +#undef gethostbyname_r + /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" @@ -18319,17 +18397,23 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $ac_func (); below. Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. Under hpux, - including includes and causes problems - checking for functions defined therein. */ -#if defined (__STDC__) && !defined (_HPUX_SOURCE) + exists even on freestanding compilers. */ + +#ifdef __STDC__ # include #else # include #endif + +#undef $ac_func + /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" @@ -18424,17 +18508,23 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +/* Define __fpu_control to an innocuous variant, in case declares __fpu_control. + For example, HP-UX 11i declares gettimeofday. */ +#define __fpu_control innocuous___fpu_control + /* System header to define __stub macros and hopefully few prototypes, which can conflict with char __fpu_control (); below. Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. Under hpux, - including includes and causes problems - checking for functions defined therein. */ -#if defined (__STDC__) && !defined (_HPUX_SOURCE) + exists even on freestanding compilers. */ + +#ifdef __STDC__ # include #else # include #endif + +#undef __fpu_control + /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" @@ -18678,17 +18768,23 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $ac_func (); below. Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. Under hpux, - including includes and causes problems - checking for functions defined therein. */ -#if defined (__STDC__) && !defined (_HPUX_SOURCE) + exists even on freestanding compilers. */ + +#ifdef __STDC__ # include #else # include #endif + +#undef $ac_func + /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" Modified: python/trunk/configure.in ============================================================================== --- python/trunk/configure.in (original) +++ python/trunk/configure.in Sun Jan 29 10:53:44 2006 @@ -4,7 +4,7 @@ m4_define(PYTHON_VERSION, 2.5) AC_REVISION($Revision$) -AC_PREREQ(2.53) +AC_PREREQ(2.59) AC_INIT(python, PYTHON_VERSION, http://www.python.org/python-bugs) AC_CONFIG_SRCDIR([Include/object.h]) AC_CONFIG_HEADER(pyconfig.h) From python-checkins at python.org Sun Jan 29 20:46:31 2006 From: python-checkins at python.org (gregory.p.smith) Date: Sun, 29 Jan 2006 20:46:31 +0100 (CET) Subject: [Python-checkins] r42200 - python/trunk/Misc/NEWS Message-ID: <20060129194631.F14031E4003@bag.python.org> Author: gregory.p.smith Date: Sun Jan 29 20:46:23 2006 New Revision: 42200 Modified: python/trunk/Misc/NEWS Log: add bsddb.db.DBEnv.set_tx_timestamp mention Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Sun Jan 29 20:46:23 2006 @@ -216,6 +216,8 @@ Extension Modules ----------------- +- Add bsddb.db.DBEnv.set_tx_timestamp allowing time based database recovery. + - Bug #1413192, fix seg fault in bsddb if a transaction was deleted before the env. From python-checkins at python.org Sun Jan 29 20:55:20 2006 From: python-checkins at python.org (martin.v.loewis) Date: Sun, 29 Jan 2006 20:55:20 +0100 (CET) Subject: [Python-checkins] r42201 - python/trunk/Lib/test/test_logging.py Message-ID: <20060129195520.EA25F1E401F@bag.python.org> Author: martin.v.loewis Date: Sun Jan 29 20:55:18 2006 New Revision: 42201 Modified: python/trunk/Lib/test/test_logging.py Log: Explicitly close the server socket. Modified: python/trunk/Lib/test/test_logging.py ============================================================================== --- python/trunk/Lib/test/test_logging.py (original) +++ python/trunk/Lib/test/test_logging.py Sun Jan 29 20:55:18 2006 @@ -109,6 +109,8 @@ abort = self.abort #notify the main thread that we're about to exit socketDataProcessed.set() + # close the listen socket + self.server_close() def process_request(self, request, client_address): #import threading From python-checkins at python.org Sun Jan 29 21:10:40 2006 From: python-checkins at python.org (martin.v.loewis) Date: Sun, 29 Jan 2006 21:10:40 +0100 (CET) Subject: [Python-checkins] r42202 - python/trunk/Lib/test/test_logging.py Message-ID: <20060129201040.EA5631E4008@bag.python.org> Author: martin.v.loewis Date: Sun Jan 29 21:10:38 2006 New Revision: 42202 Modified: python/trunk/Lib/test/test_logging.py Log: Try a number of ports, in case 9020 is already in use. Modified: python/trunk/Lib/test/test_logging.py ============================================================================== --- python/trunk/Lib/test/test_logging.py (original) +++ python/trunk/Lib/test/test_logging.py Sun Jan 29 21:10:38 2006 @@ -565,11 +565,23 @@ hdlr.setFormatter(fmt) rootLogger.addHandler(hdlr) + # Find an unused port number + port = logging.handlers.DEFAULT_TCP_LOGGING_PORT + while port < logging.handlers.DEFAULT_TCP_LOGGING_PORT+100: + try: + tcpserver = LogRecordSocketReceiver(port=port) + except socket.error: + port += 1 + else: + break + else: + raise ImportError, "Could not find unused port" + + #Set up a handler such that all events are sent via a socket to the log #receiver (logrecv). #The handler will only be added to the rootLogger for some of the tests - shdlr = logging.handlers.SocketHandler('localhost', - logging.handlers.DEFAULT_TCP_LOGGING_PORT) + shdlr = logging.handlers.SocketHandler('localhost', port) #Configure the logger for logrecv so events do not propagate beyond it. #The sockLogger output is buffered in memory until the end of the test, @@ -585,7 +597,6 @@ #Set up servers threads = [] - tcpserver = LogRecordSocketReceiver() #sys.stdout.write("About to start TCP server...\n") threads.append(threading.Thread(target=runTCP, args=(tcpserver,))) From python-checkins at python.org Sun Jan 29 23:50:27 2006 From: python-checkins at python.org (tim.peters) Date: Sun, 29 Jan 2006 23:50:27 +0100 (CET) Subject: [Python-checkins] r42203 - python/trunk/Lib/test/test_logging.py Message-ID: <20060129225027.BE11D1E4004@bag.python.org> Author: tim.peters Date: Sun Jan 29 23:50:26 2006 New Revision: 42203 Modified: python/trunk/Lib/test/test_logging.py Log: Whitespace normalization. Modified: python/trunk/Lib/test/test_logging.py ============================================================================== --- python/trunk/Lib/test/test_logging.py (original) +++ python/trunk/Lib/test/test_logging.py Sun Jan 29 23:50:26 2006 @@ -576,7 +576,7 @@ break else: raise ImportError, "Could not find unused port" - + #Set up a handler such that all events are sent via a socket to the log #receiver (logrecv). From python-checkins at python.org Mon Jan 30 00:54:40 2006 From: python-checkins at python.org (gregory.p.smith) Date: Mon, 30 Jan 2006 00:54:40 +0100 (CET) Subject: [Python-checkins] r42204 - python/trunk/Lib/bsddb/test/test_1413192.py Message-ID: <20060129235440.53FE71E4003@bag.python.org> Author: gregory.p.smith Date: Mon Jan 30 00:54:38 2006 New Revision: 42204 Modified: python/trunk/Lib/bsddb/test/test_1413192.py Log: fix test import for use in standalone pybsddb project bsddb3 module as well as python builtin bsddb. Modified: python/trunk/Lib/bsddb/test/test_1413192.py ============================================================================== --- python/trunk/Lib/bsddb/test/test_1413192.py (original) +++ python/trunk/Lib/bsddb/test/test_1413192.py Mon Jan 30 00:54:38 2006 @@ -4,7 +4,12 @@ # This test relies on the variable names, see the bug report for details. # The problem was that the env was deallocated prior to the txn. -from bsddb import db +try: + # For Pythons w/distutils and add-on pybsddb + from bsddb3 import db +except ImportError: + # For Python >= 2.3 builtin bsddb distribution + from bsddb import db env_name = '.' From python-checkins at python.org Mon Jan 30 01:22:11 2006 From: python-checkins at python.org (gregory.p.smith) Date: Mon, 30 Jan 2006 01:22:11 +0100 (CET) Subject: [Python-checkins] r42205 - python/trunk/Modules/_bsddb.c Message-ID: <20060130002211.406781E4003@bag.python.org> Author: gregory.p.smith Date: Mon Jan 30 01:22:08 2006 New Revision: 42205 Modified: python/trunk/Modules/_bsddb.c Log: maintain support for older python versions in this module so that it is ok for a standalone pybsddb source dist for use with <= 2.3. Modified: python/trunk/Modules/_bsddb.c ============================================================================== --- python/trunk/Modules/_bsddb.c (original) +++ python/trunk/Modules/_bsddb.c Mon Jan 30 01:22:08 2006 @@ -1537,11 +1537,19 @@ keyObj = PyInt_FromLong(*(int *)key.data); else keyObj = PyString_FromStringAndSize(key.data, key.size); +#if (PY_VERSION_HEX >= 0x02040000) retval = PyTuple_Pack(3, keyObj, pkeyObj, dataObj); +#else + retval = Py_BuildValue("OOO", keyObj, pkeyObj, dataObj); +#endif } else /* return just the pkey and data */ { +#if (PY_VERSION_HEX >= 0x02040000) retval = PyTuple_Pack(2, pkeyObj, dataObj); +#else + retval = Py_BuildValue("OO", pkeyObj, dataObj); +#endif } FREE_DBT(pkey); FREE_DBT(data); @@ -3187,12 +3195,20 @@ keyObj = PyInt_FromLong(*(int *)key.data); else keyObj = PyString_FromStringAndSize(key.data, key.size); +#if (PY_VERSION_HEX >= 0x02040000) retval = PyTuple_Pack(3, keyObj, pkeyObj, dataObj); +#else + retval = Py_BuildValue("OOO", keyObj, pkeyObj, dataObj); +#endif FREE_DBT(key); } else /* return just the pkey and data */ { +#if (PY_VERSION_HEX >= 0x02040000) retval = PyTuple_Pack(2, pkeyObj, dataObj); +#else + retval = Py_BuildValue("OO", pkeyObj, dataObj); +#endif } FREE_DBT(pkey); FREE_DBT(data); From python-checkins at python.org Mon Jan 30 16:04:33 2006 From: python-checkins at python.org (martin.v.loewis) Date: Mon, 30 Jan 2006 16:04:33 +0100 (CET) Subject: [Python-checkins] r42206 - python/trunk/Misc/NEWS Message-ID: <20060130150433.028031E4003@bag.python.org> Author: martin.v.loewis Date: Mon Jan 30 16:04:31 2006 New Revision: 42206 Modified: python/trunk/Misc/NEWS Log: Fix typo. Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Mon Jan 30 16:04:31 2006 @@ -347,7 +347,7 @@ Library ------- -- Patch #1349118: urllib2 now supports user:pass@ style proxy +- Patch #1349118: urllib now supports user:pass@ style proxy specifications, raises IOErrors when proxies for unsupported protocols are defined, and uses the https proxy on https redirections. From python-checkins at python.org Tue Jan 31 15:28:31 2006 From: python-checkins at python.org (andrew.kuchling) Date: Tue, 31 Jan 2006 15:28:31 +0100 (CET) Subject: [Python-checkins] r42207 - sandbox/trunk/pycon/Makefile Message-ID: <20060131142831.41C681E4004@bag.python.org> Author: andrew.kuchling Date: Tue Jan 31 15:28:30 2006 New Revision: 42207 Added: sandbox/trunk/pycon/Makefile Log: Add makefile Added: sandbox/trunk/pycon/Makefile ============================================================================== --- (empty file) +++ sandbox/trunk/pycon/Makefile Tue Jan 31 15:28:30 2006 @@ -0,0 +1,4 @@ + +html: + ./parse-sched.py --format=html schedule.html + From python-checkins at python.org Tue Jan 31 16:25:27 2006 From: python-checkins at python.org (david.goodger) Date: Tue, 31 Jan 2006 16:25:27 +0100 (CET) Subject: [Python-checkins] r42208 - sandbox/trunk/pycon/parse-sched.py Message-ID: <20060131152527.0A6971E4004@bag.python.org> Author: david.goodger Date: Tue Jan 31 16:25:23 2006 New Revision: 42208 Modified: sandbox/trunk/pycon/parse-sched.py Log: parameterized parse(); partially fixed plenary logic; added talk number to rendered title; whitespace Modified: sandbox/trunk/pycon/parse-sched.py ============================================================================== --- sandbox/trunk/pycon/parse-sched.py (original) +++ sandbox/trunk/pycon/parse-sched.py Tue Jan 31 16:25:23 2006 @@ -3,7 +3,7 @@ # Reads a page with a wiki-format table # Basic data structure of dictionary: -# {(year, month, day) -> [(time, duration, title)] +# {(year, month, day) -> [(room, time, duration, title)] import sys, optparse import re, string @@ -16,18 +16,17 @@ line_pat = re.compile('[|]{2}.*[|]{2}\s*$') talk_pat = re.compile('#(\d+)') -def parse (): - lines = sys.stdin.readlines() +def parse (lines): lines = map(string.strip, lines) d = {} date = None - + for line in lines: m = date_pat.match(line) if m: date = [int(value) for value in m.group(1,2,3)] date = tuple(date) - + m = line_pat.match(line) if m: if date is None: @@ -71,7 +70,7 @@ L[1] = '%02i:%02i' % (hour, min) L[2] = int(L[2]) return tuple(L) - + # @@ -98,8 +97,8 @@ if span == len(time_list): return 1 return span+1 - - + + def format_day (day, output): # Figure out unique rooms rooms = [] @@ -127,7 +126,7 @@ print >>output, '' % room, print >>output, '' print >>output, "" - + # Sort list time_dict = {} for room, time, duration, title in day: @@ -145,7 +144,7 @@ if end_time <= time: del active[act_room] print >>output, '' % time, - plenary = (len(room_dict) == 1) + plenary = (len(room_dict) == 1 and room_dict.keys()[0] == '---') if plenary: # Plenary session of some sort duration, title = room_dict.values()[0] @@ -153,7 +152,7 @@ print >>output, '' % (colspan, title), print >>output, '' continue - + for room in rooms: # Room still occupied, so skip it if room in active: @@ -174,13 +173,17 @@ if m is not None: talk_num = int(m.group(1)) title = talks.get_title(talk_num) + url = ('http://wiki.python.org/moin/PyCon2006/Talks#%s' + % talk_num) + title = '%s (%s)' % (cgi.escape(title), + url, talk_num) else: title = cgi.escape(title) - + print >>output, '', - + print >>output, '' print '' @@ -195,7 +198,7 @@ print >>output, date.strftime('

    %A, %B %d %Y

    ') format_day(day_data, output) - + def main (): parser = optparse.OptionParser(usage="usage: %prog [options] < final-schedule") parser.add_option('--format', @@ -206,7 +209,7 @@ help = "Select output format") options, args = parser.parse_args() - d = parse() + d = parse(lines=sys.stdin.readlines()) fmt = options.format if fmt == 'print': pprint.pprint(d) @@ -221,12 +224,6 @@ else: print >>sys.stderr, "Unknown format %r" % fmt sys.exit(1) - + if __name__ == '__main__': main() - - - - - - From python-checkins at python.org Tue Jan 31 16:25:41 2006 From: python-checkins at python.org (david.goodger) Date: Tue, 31 Jan 2006 16:25:41 +0100 (CET) Subject: [Python-checkins] r42209 - sandbox/trunk/pycon/talks.py Message-ID: <20060131152541.254BA1E4004@bag.python.org> Author: david.goodger Date: Tue Jan 31 16:25:40 2006 New Revision: 42209 Modified: sandbox/trunk/pycon/talks.py Log: get_title just returns the title -- HTML processing elsewhere Modified: sandbox/trunk/pycon/talks.py ============================================================================== --- sandbox/trunk/pycon/talks.py (original) +++ sandbox/trunk/pycon/talks.py Tue Jan 31 16:25:40 2006 @@ -57,14 +57,8 @@ 73: 'State of Zope', } -import cgi - def get_title (num): title = talk_dict.get(num) if title is None: return '#' + str(num) - - url = 'http://wiki.python.org/moin/PyCon2006/Talks#' + str(num) - title = cgi.escape(title) - return '%s' % (url, title) - + return title From python-checkins at python.org Tue Jan 31 16:26:21 2006 From: python-checkins at python.org (david.goodger) Date: Tue, 31 Jan 2006 16:26:21 +0100 (CET) Subject: [Python-checkins] r42210 - sandbox/trunk/pycon/parse-sched.py sandbox/trunk/pycon/parse_sched.py Message-ID: <20060131152621.F14991E4004@bag.python.org> Author: david.goodger Date: Tue Jan 31 16:26:21 2006 New Revision: 42210 Added: sandbox/trunk/pycon/parse_sched.py - copied unchanged from r42208, sandbox/trunk/pycon/parse-sched.py Removed: sandbox/trunk/pycon/parse-sched.py Log: renamed to parse_sched to make importable Deleted: /sandbox/trunk/pycon/parse-sched.py ============================================================================== --- /sandbox/trunk/pycon/parse-sched.py Tue Jan 31 16:26:21 2006 +++ (empty file) @@ -1,229 +0,0 @@ -#!/usr/bin/env python - -# Reads a page with a wiki-format table - -# Basic data structure of dictionary: -# {(year, month, day) -> [(room, time, duration, title)] - -import sys, optparse -import re, string -import pprint, cgi -import datetime - -import talks - -date_pat = re.compile('^=\s+(\d{4})-(\d{2})-(\d{1,2})\s+.*\s+=') -line_pat = re.compile('[|]{2}.*[|]{2}\s*$') -talk_pat = re.compile('#(\d+)') - -def parse (lines): - lines = map(string.strip, lines) - d = {} - date = None - - for line in lines: - m = date_pat.match(line) - if m: - date = [int(value) for value in m.group(1,2,3)] - date = tuple(date) - - m = line_pat.match(line) - if m: - if date is None: - print >>sys.stderr, "Table line before date header: %r" % line - else: - L = line.split('||') - L = map(string.strip, L) - assert L[0] == '' - assert L[-1] == '' - L = L[1:-1] - if len(L) != 4: - print >>sys.stderr, "Wrong number of fields in line: %r" % line - # Skip headers - elif L[0].lower() == 'room': - pass - else: - event_list = d.setdefault(date, []) - t = canonicalize_presentation(L) - event_list.append(t) - - return d - - -time_pat = re.compile('(\d+):(\d+)') - -def parse_time (S): - """(str): (int, int) - Parse a time into an (hour, minute) tuple. - """ - m = time_pat.match(S) - assert m is not None - hour, min = int(m.group(1)), int(m.group(2)) - return hour, min - - -def canonicalize_presentation (L): - """Take a 4-item list for a talk and perform various - corrections to it. - """ - hour, min = parse_time(L[1]) - L[1] = '%02i:%02i' % (hour, min) - L[2] = int(L[2]) - return tuple(L) - - - -# -# HTML generation functions -# - -def add_time (start_time, duration): - hour, min = parse_time(start_time) - while duration > 60: - hour += 1 - duration -= 60 - min += duration - while min >= 60: - hour += 1 - min -= 60 - return '%02i:%02i' % (hour, min) - -def find_next_time (time_list, end_time): - if len(time_list) == 0: - return 1 - span = 0 - while (span < len(time_list) and time_list[span][0] < end_time): - span += 1 - if span == len(time_list): - return 1 - return span+1 - - -def format_day (day, output): - # Figure out unique rooms - rooms = [] - for room, time, duration, title in day: - # XXX change to use regex pattern - if room.startswith('-'): - continue - if room not in rooms: - rooms.append(room) - rooms.sort() - - # Move Bent Tree to be last - if 'Bent Tree' in rooms: - rooms.remove('Bent Tree') - rooms.append('Bent Tree') - - # Print room header - print >>output, '''
    %s
    %s%s
    ' % rowspan, print >>output, title, print >>output, '
    --''' % len(rooms) - - print >>output, "" - print >>output, '', - for room in rooms: - print >>output, '' % room, - print >>output, '' - print >>output, "" - - # Sort list - time_dict = {} - for room, time, duration, title in day: - d = time_dict.setdefault(time, {}) - d[room] = (duration, title) - time_list = time_dict.items() - time_list.sort() - - active = {} - print '' - while len(time_list) > 0: - time, room_dict = time_list.pop(0) - print >>output, '', - for act_room, end_time in active.items(): - if end_time <= time: - del active[act_room] - print >>output, '' % time, - plenary = (len(room_dict) == 1 and room_dict.keys()[0] == '---') - if plenary: - # Plenary session of some sort - duration, title = room_dict.values()[0] - colspan = len(rooms) - print >>output, '' % (colspan, title), - print >>output, '' - continue - - for room in rooms: - # Room still occupied, so skip it - if room in active: - continue - - # New room - t = room_dict.get(room) - if t is None: - print >>output, '', - else: - duration, title = t - end_time = add_time(time, duration) - active[room] = end_time - rowspan = find_next_time(time_list, end_time) - - # Turn talk numbers into a link with a title - m = talk_pat.match(title) - if m is not None: - talk_num = int(m.group(1)) - title = talks.get_title(talk_num) - url = ('http://wiki.python.org/moin/PyCon2006/Talks#%s' - % talk_num) - title = '%s (%s)' % (cgi.escape(title), - url, talk_num) - else: - title = cgi.escape(title) - - print >>output, '', - - print >>output, '' - - print '' - print >>output, '
    %s
    %s%s
     ' % rowspan, - print >>output, title, - print >>output, '
    ' - - -def output_html (d, output): - L = d.items() ; L.sort() - - for (y, m, day), day_data in L: - date = datetime.date(y, m, day) - print >>output, date.strftime('

    %A, %B %d %Y

    ') - format_day(day_data, output) - - -def main (): - parser = optparse.OptionParser(usage="usage: %prog [options] < final-schedule") - parser.add_option('--format', - type='choice', - choices=['pickle', 'python', 'print', 'html'], - default='print', - action="store", dest="format", - help = "Select output format") - options, args = parser.parse_args() - - d = parse(lines=sys.stdin.readlines()) - fmt = options.format - if fmt == 'print': - pprint.pprint(d) - elif fmt == 'python': - print 'schedule =', - pprint.pprint(d) - elif fmt == 'pickle': - import cPickle - cPickle.dump(d, sys.stdout) - elif fmt == 'html': - output_html(d, sys.stdout) - else: - print >>sys.stderr, "Unknown format %r" % fmt - sys.exit(1) - -if __name__ == '__main__': - main() From python-checkins at python.org Tue Jan 31 16:27:25 2006 From: python-checkins at python.org (david.goodger) Date: Tue, 31 Jan 2006 16:27:25 +0100 (CET) Subject: [Python-checkins] r42211 - sandbox/trunk/pycon/Makefile sandbox/trunk/pycon/README Message-ID: <20060131152725.793641E4004@bag.python.org> Author: david.goodger Date: Tue Jan 31 16:27:24 2006 New Revision: 42211 Modified: sandbox/trunk/pycon/Makefile sandbox/trunk/pycon/README Log: updated for renamed parse_sched.py Modified: sandbox/trunk/pycon/Makefile ============================================================================== --- sandbox/trunk/pycon/Makefile (original) +++ sandbox/trunk/pycon/Makefile Tue Jan 31 16:27:24 2006 @@ -1,4 +1,4 @@ html: - ./parse-sched.py --format=html schedule.html + ./parse_sched.py --format=html schedule.html Modified: sandbox/trunk/pycon/README ============================================================================== --- sandbox/trunk/pycon/README (original) +++ sandbox/trunk/pycon/README Tue Jan 31 16:27:24 2006 @@ -6,13 +6,13 @@ get.sh Fetches the source of the PyCon2006/FinalSchedule wiki page and saves it as FinalSchedule. (Uses GNU wget) -parse-sched.py Reads the source of the FinalSchedule page from stdin, +parse_sched.py Reads the source of the FinalSchedule page from stdin, and can output the schedule as HTML or a Python data structure. Run get.sh once; then invoke the script with something like: - ./parse-sched.py --format=html schedule.html + ./parse_sched.py --format=html schedule.html Please feel free to submit patches that add new output formats. If you have commit privileges, feel free to just check in changes that add From python-checkins at python.org Tue Jan 31 19:34:18 2006 From: python-checkins at python.org (gustavo.niemeyer) Date: Tue, 31 Jan 2006 19:34:18 +0100 (CET) Subject: [Python-checkins] r42212 - in python/trunk: Lib/difflib.py Lib/test/test_difflib.py Misc/NEWS Message-ID: <20060131183418.BFDD31E4004@bag.python.org> Author: gustavo.niemeyer Date: Tue Jan 31 19:34:13 2006 New Revision: 42212 Modified: python/trunk/Lib/difflib.py python/trunk/Lib/test/test_difflib.py python/trunk/Misc/NEWS Log: Patch #1413711: Certain patterns of differences were making difflib touch the recursion limit. The applied patch inlines the recursive __helper method in a non-recursive way. Modified: python/trunk/Lib/difflib.py ============================================================================== --- python/trunk/Lib/difflib.py (original) +++ python/trunk/Lib/difflib.py Tue Jan 31 19:34:13 2006 @@ -473,26 +473,31 @@ if self.matching_blocks is not None: return self.matching_blocks - self.matching_blocks = [] la, lb = len(self.a), len(self.b) - self.__helper(0, la, 0, lb, self.matching_blocks) - self.matching_blocks.append( (la, lb, 0) ) - return self.matching_blocks - # builds list of matching blocks covering a[alo:ahi] and - # b[blo:bhi], appending them in increasing order to answer + indexed_blocks = [] + queue = [(0, la, 0, lb)] + while queue: + # builds list of matching blocks covering a[alo:ahi] and + # b[blo:bhi], appending them in increasing order to answer + alo, ahi, blo, bhi = queue.pop() + + # a[alo:i] vs b[blo:j] unknown + # a[i:i+k] same as b[j:j+k] + # a[i+k:ahi] vs b[j+k:bhi] unknown + i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi) + + if k: + if alo < i and blo < j: + queue.append((alo, i, blo, j)) + indexed_blocks.append((i, x)) + if i+k < ahi and j+k < bhi: + queue.append((i+k, ahi, j+k, bhi)) + indexed_blocks.sort() - def __helper(self, alo, ahi, blo, bhi, answer): - i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi) - # a[alo:i] vs b[blo:j] unknown - # a[i:i+k] same as b[j:j+k] - # a[i+k:ahi] vs b[j+k:bhi] unknown - if k: - if alo < i and blo < j: - self.__helper(alo, i, blo, j, answer) - answer.append(x) - if i+k < ahi and j+k < bhi: - self.__helper(i+k, ahi, j+k, bhi, answer) + self.matching_blocks = [elem[1] for elem in indexed_blocks] + self.matching_blocks.append( (la, lb, 0) ) + return self.matching_blocks def get_opcodes(self): """Return list of 5-tuples describing how to turn a into b. Modified: python/trunk/Lib/test/test_difflib.py ============================================================================== --- python/trunk/Lib/test/test_difflib.py (original) +++ python/trunk/Lib/test/test_difflib.py Tue Jan 31 19:34:13 2006 @@ -2,6 +2,7 @@ from test.test_support import run_unittest, findfile import unittest import doctest +import sys class TestSFbugs(unittest.TestCase): @@ -143,6 +144,14 @@ self.assertEqual(actual,expect) + def test_recursion_limit(self): + # Check if the problem described in patch #1413711 exists. + limit = sys.getrecursionlimit() + old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)] + new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)] + difflib.SequenceMatcher(None, old, new).get_opcodes() + + Doctests = doctest.DocTestSuite(difflib) run_unittest(TestSFpatches, TestSFbugs, Doctests) Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Tue Jan 31 19:34:13 2006 @@ -676,6 +676,9 @@ - ` uu.encode()`` and ``uu.decode()`` now support unicode filenames. +- Patch #1413711: Certain patterns of differences were making difflib + touch the recursion limit. + Build -----